From 18807b29ecb58d45e3ffc4af3a0dc74d7744aef1 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 25 Jun 2019 17:23:12 -0700 Subject: [PATCH 01/56] Implement IDevice backed by Azure Page Blobs --- cs/FASTER.sln | 3 + cs/src/core/Device/AzurePageBlobDevice.cs | 88 +++++++++++++++++++++++ cs/src/core/FASTER.core.csproj | 1 + 3 files changed, 92 insertions(+) create mode 100644 cs/src/core/Device/AzurePageBlobDevice.cs diff --git a/cs/FASTER.sln b/cs/FASTER.sln index a724440c6..9424c9738 100644 --- a/cs/FASTER.sln +++ b/cs/FASTER.sln @@ -12,6 +12,9 @@ EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "benchmark", "benchmark", "{CA6AB459-A31A-4C15-B1A6-A82C349B54B4}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{81B3B5D1-70F6-4979-AC76-003F9A6B316B}" + ProjectSection(SolutionItems) = preProject + src\core\FASTER.core.nuspec = src\core\FASTER.core.nuspec + EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SumStore", "playground\SumStore\SumStore.csproj", "{05D61B37-9714-4234-9961-384A63F7175E}" EndProject diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs new file mode 100644 index 000000000..8bbed492f --- /dev/null +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -0,0 +1,88 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Collections.Concurrent; +using System.IO; +using System.Threading; +using Microsoft.Azure.Storage; +using Microsoft.Azure.Storage.Blob; + +namespace FASTER.core.Device +{ + public class AzurePageBlobDevice : StorageDeviceBase + { + private CloudBlobContainer container; + private readonly ConcurrentDictionary blobs; + // I don't believe the FileName attribute on the base class is meaningful here. As no external operation depends on its return value. + // Therefore, I am using just the connectionString even though it is not a "file name". + public AzurePageBlobDevice(string connectionString, string containerName, uint sectorSize = 512) : base(connectionString, sectorSize) + { + CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); + CloudBlobClient client = storageAccount.CreateCloudBlobClient(); + container = client.GetContainerReference(containerName); + // TODO(Tianyu): WTF does this even do + container.CreateIfNotExists(); + } + + public override void Close() + { + // From what I can tell from the (nonexistent) documentation, no close operation is requried of page blobs + } + + public override void DeleteSegmentRange(int fromSegment, int toSegment) + { + for (int i = fromSegment; i < toSegment; i++) + { + if (blobs.TryRemove(i, out CloudPageBlob blob)) + { + blob.Delete(); + } + } + } + + public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) + { + CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); + + // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API + Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); + NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); + + UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)destinationAddress, readLength); + + // What do with the return value, or do I just not care? + pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => callback(0, readLength, ovNative), asyncResult); + } + + public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + { + CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); + + // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API + Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); + NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); + UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)sourceAddress, numBytesToWrite); + pageBlob.BeginWritePages(stream, (long)destinationAddress, null, ar => callback(0, numBytesToWrite, ovNative), asyncResult); + } + + private CloudPageBlob GetOrAddPageBlob(int segmentId) + { + return blobs.GetOrAdd(segmentId, id => CreatePageBlob(id)); + } + + private CloudPageBlob CreatePageBlob(int segmentId) + { + // TODO(Tianyu): Is this now blocking? How sould this work when multiple apps share the same backing blob store? + // TODO(Tianyu): Need a better naming scheme? + CloudPageBlob blob = container.GetPageBlobReference("segment." + segmentId); + // TODO(Tianyu): There does not seem to be an equivalent concept to preallocating in page blobs + // TODO(Tianyu): Also, why the hell is there no CreateIfExists on this thing? This is race-prone if multiple apps are sharing access to an instance + // Maybe I should fix this using leases, but the lease API is just absolute shit and has no documentation. + blob.Create(SectorSize); + return blob; + } + } + + +} diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj index c2dbeca27..57157d97d 100644 --- a/cs/src/core/FASTER.core.csproj +++ b/cs/src/core/FASTER.core.csproj @@ -35,6 +35,7 @@ + From db8873eb3f4002caa6dca0ee93edc4a4c155138f Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Thu, 27 Jun 2019 18:11:56 -0700 Subject: [PATCH 02/56] bug fixes --- cs/src/core/Device/AzurePageBlobDevice.cs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 8bbed492f..89761d426 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -8,7 +8,7 @@ using Microsoft.Azure.Storage; using Microsoft.Azure.Storage.Blob; -namespace FASTER.core.Device +namespace FASTER.core { public class AzurePageBlobDevice : StorageDeviceBase { @@ -23,6 +23,7 @@ public AzurePageBlobDevice(string connectionString, string containerName, uint s container = client.GetContainerReference(containerName); // TODO(Tianyu): WTF does this even do container.CreateIfNotExists(); + blobs = new ConcurrentDictionary(); } public override void Close() @@ -49,9 +50,10 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); - UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)destinationAddress, readLength); + UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)destinationAddress, readLength, readLength, FileAccess.Write); - // What do with the return value, or do I just not care? + // TODO(Tianyu): This implementation seems to swallow exceptions that would otherwise be thrown from the synchronous version of this + // function. I wasn't able to find any good documentaiton on how exceptions are propagated or handled in this scenario. pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => callback(0, readLength, ovNative), asyncResult); } @@ -76,10 +78,9 @@ private CloudPageBlob CreatePageBlob(int segmentId) // TODO(Tianyu): Is this now blocking? How sould this work when multiple apps share the same backing blob store? // TODO(Tianyu): Need a better naming scheme? CloudPageBlob blob = container.GetPageBlobReference("segment." + segmentId); - // TODO(Tianyu): There does not seem to be an equivalent concept to preallocating in page blobs - // TODO(Tianyu): Also, why the hell is there no CreateIfExists on this thing? This is race-prone if multiple apps are sharing access to an instance - // Maybe I should fix this using leases, but the lease API is just absolute shit and has no documentation. - blob.Create(SectorSize); + // TODO(Tianyu): There is a race hidden here if multiple applications are interacting with the same underlying blob store. + // How that should be fixed is dependent on our decision on the architecture. + blob.Create(segmentSize); return blob; } } From 33bb63dadc37024c2dd9e3623e6eb876614ce9a8 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Fri, 28 Jun 2019 10:54:30 -0700 Subject: [PATCH 03/56] fix bug resulting from -1 segment size --- cs/src/core/Device/AzurePageBlobDevice.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 89761d426..222d128c5 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -14,6 +14,10 @@ public class AzurePageBlobDevice : StorageDeviceBase { private CloudBlobContainer container; private readonly ConcurrentDictionary blobs; + + // Azure Page Blobs permit blobs of max size 8 TB + const long MAX_BLOB_SIZE = (long)(8 * 10e12); + // I don't believe the FileName attribute on the base class is meaningful here. As no external operation depends on its return value. // Therefore, I am using just the connectionString even though it is not a "file name". public AzurePageBlobDevice(string connectionString, string containerName, uint sectorSize = 512) : base(connectionString, sectorSize) @@ -78,6 +82,12 @@ private CloudPageBlob CreatePageBlob(int segmentId) // TODO(Tianyu): Is this now blocking? How sould this work when multiple apps share the same backing blob store? // TODO(Tianyu): Need a better naming scheme? CloudPageBlob blob = container.GetPageBlobReference("segment." + segmentId); + + // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because + // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of + // how large it can grow to. + var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; + // TODO(Tianyu): There is a race hidden here if multiple applications are interacting with the same underlying blob store. // How that should be fixed is dependent on our decision on the architecture. blob.Create(segmentSize); From b0f7961b3e62ec9d7b88e5f609a2a2b0545b87dc Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Fri, 28 Jun 2019 14:05:36 -0700 Subject: [PATCH 04/56] Change blob size --- cs/src/core/Device/AzurePageBlobDevice.cs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 222d128c5..cc3215c5b 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -15,8 +15,8 @@ public class AzurePageBlobDevice : StorageDeviceBase private CloudBlobContainer container; private readonly ConcurrentDictionary blobs; - // Azure Page Blobs permit blobs of max size 8 TB - const long MAX_BLOB_SIZE = (long)(8 * 10e12); + // Azure Page Blobs permit blobs of max size 8 TB, but the emulator permits only 2 GB + const long MAX_BLOB_SIZE = (long)(2 * 10e8); // I don't believe the FileName attribute on the base class is meaningful here. As no external operation depends on its return value. // Therefore, I am using just the connectionString even though it is not a "file name". @@ -25,7 +25,6 @@ public AzurePageBlobDevice(string connectionString, string containerName, uint s CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient client = storageAccount.CreateCloudBlobClient(); container = client.GetContainerReference(containerName); - // TODO(Tianyu): WTF does this even do container.CreateIfNotExists(); blobs = new ConcurrentDictionary(); } @@ -90,7 +89,7 @@ private CloudPageBlob CreatePageBlob(int segmentId) // TODO(Tianyu): There is a race hidden here if multiple applications are interacting with the same underlying blob store. // How that should be fixed is dependent on our decision on the architecture. - blob.Create(segmentSize); + blob.Create(size); return blob; } } From 5453ae96cc05e910848b3a3848aa2f10ea33b2dd Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 1 Jul 2019 10:48:39 -0700 Subject: [PATCH 05/56] Change the API to match LocalStorageDevice more closely. Convert some tests to also run for AzurePageBlobDevice. --- cs/src/core/Device/AzurePageBlobDevice.cs | 28 +++++++++++---- cs/src/core/Device/Devices.cs | 15 ++++++-- cs/test/BasicDiskFASTERTests.cs | 42 ++++++++++++++--------- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index cc3215c5b..982119dff 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -14,24 +14,38 @@ public class AzurePageBlobDevice : StorageDeviceBase { private CloudBlobContainer container; private readonly ConcurrentDictionary blobs; - + private readonly string blobName; + private readonly bool deleteOnClose; // Azure Page Blobs permit blobs of max size 8 TB, but the emulator permits only 2 GB const long MAX_BLOB_SIZE = (long)(2 * 10e8); + // Azure Page Blobs have a fixed sector size of 512 bytes. + const uint PAGE_BLOB_SECTOR_SIZE = 512; - // I don't believe the FileName attribute on the base class is meaningful here. As no external operation depends on its return value. - // Therefore, I am using just the connectionString even though it is not a "file name". - public AzurePageBlobDevice(string connectionString, string containerName, uint sectorSize = 512) : base(connectionString, sectorSize) + public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false) + : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE) { CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient client = storageAccount.CreateCloudBlobClient(); container = client.GetContainerReference(containerName); container.CreateIfNotExists(); blobs = new ConcurrentDictionary(); - } + this.blobName = blobName; + this.deleteOnClose = deleteOnClose; + } public override void Close() { - // From what I can tell from the (nonexistent) documentation, no close operation is requried of page blobs + // Unlike in LocalStorageDevice, we explicitly remove all page blobs if the deleteOnClose flag is set, instead of relying on the operating system + // to delete files after the end of our process. This leads to potential problems if multiple instances are sharing the same underlying page blobs. + // + // Since this flag is presumably only used for testing though, it is probably fine. + if (deleteOnClose) + { + foreach (var entry in blobs) + { + entry.Value.Delete(); + } + } } public override void DeleteSegmentRange(int fromSegment, int toSegment) @@ -80,7 +94,7 @@ private CloudPageBlob CreatePageBlob(int segmentId) { // TODO(Tianyu): Is this now blocking? How sould this work when multiple apps share the same backing blob store? // TODO(Tianyu): Need a better naming scheme? - CloudPageBlob blob = container.GetPageBlobReference("segment." + segmentId); + CloudPageBlob blob = container.GetPageBlobReference(blobName + segmentId); // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index 02b51679e..e98744057 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -8,13 +8,14 @@ namespace FASTER.core { - - /// /// Factory to create FASTER objects /// public static class Devices { + public const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; + public const string TEST_CONTAINER = "test"; + /// /// Create a storage device for the log /// @@ -24,6 +25,7 @@ public static class Devices /// Device instance public static IDevice CreateLogDevice(string logPath, bool preallocateFile = true, bool deleteOnClose = false) { + if (string.IsNullOrWhiteSpace(logPath)) return new NullDevice(); @@ -41,5 +43,14 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru } return logDevice; } + + // TODO(Tianyu): How do we want to integrate the option of using AzurePageBlobDevice into the original static factory class? We can either follow the original pattern and somehow encode this in the string path argument, + // or use concrete factories that are initialized per instance to only create one type. + public static IDevice CreateAzurePageBlobDevice(string blobName, string storageString = EMULATED_STORAGE_STRING, string containerName = TEST_CONTAINER, bool deleteOnClose = false) + { + return new AzurePageBlobDevice(storageString, containerName, blobName, deleteOnClose); + } } + + } diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index a84abb206..c819ce083 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -14,33 +14,20 @@ namespace FASTER.test { + // TODO(Tianyu): Now that we are also testing device with Azure Page Blobs here, should we also rename the test? [TestFixture] internal class BasicDiskFASTERTests { private FasterKV fht; private IDevice log; - [SetUp] - public void Setup() + void TestDeviceWriteRead(IDevice log) { - log = Devices.CreateLogDevice(TestContext.CurrentContext.TestDirectory + "\\BasicDiskFASTERTests.log", deleteOnClose: true); + this.log = log; fht = new FasterKV - (1L<<20, new Functions(), new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 10 }); + (1L << 20, new Functions(), new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 10 }); fht.StartSession(); - } - - [TearDown] - public void TearDown() - { - fht.StopSession(); - fht.Dispose(); - fht = null; - log.Close(); - } - [Test] - public void NativeDiskWriteRead() - { InputStruct input = default(InputStruct); for (int i = 0; i < 2000; i++) @@ -87,5 +74,26 @@ public void NativeDiskWriteRead() } } } + + [TearDown] + public void TearDown() + { + fht.StopSession(); + fht.Dispose(); + fht = null; + log.Close(); + } + + [Test] + public void NativeDiskWriteRead() + { + TestDeviceWriteRead(Devices.CreateLogDevice(TestContext.CurrentContext.TestDirectory + "\\BasicDiskFASTERTests.log", deleteOnClose: true)); + } + + [Test] + public void PageBlobWriteRead() + { + TestDeviceWriteRead(Devices.CreateAzurePageBlobDevice("BasicDiskFASTERTests", deleteOnClose: false)); + } } } From 3de7755e8a9c694daf718fcfe1ea9b28b0b03b9b Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 08:41:57 -0700 Subject: [PATCH 06/56] Attempt to spin up Azure Storage Emulator on the C# build pipeline --- azure-pipelines.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2abdd0a75..37f283248 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -38,6 +38,12 @@ jobs: platform: '$(buildPlatform)' configuration: '$(buildConfiguration)' + - script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' + displayName: 'Init Test Db' + + - script: '"C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start' + displayName: 'Start Storage Emulator' + - task: VSTest@2 inputs: testAssemblyVer2: | From 01e59451608cc7177dcbc4c8587e445134e2dbeb Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 12:44:38 -0700 Subject: [PATCH 07/56] Add concept of capacity to devices --- cs/src/core/Device/AzurePageBlobDevice.cs | 4 ++-- cs/src/core/Device/IDevice.cs | 6 ++++++ cs/src/core/Device/LocalStorageDevice.cs | 5 +++-- cs/src/core/Device/ManagedLocalStorageDevice.cs | 5 +++-- cs/src/core/Device/StorageDeviceBase.cs | 16 ++++++++++++---- 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 982119dff..7a1a8f6eb 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -21,8 +21,8 @@ public class AzurePageBlobDevice : StorageDeviceBase // Azure Page Blobs have a fixed sector size of 512 bytes. const uint PAGE_BLOB_SECTOR_SIZE = 512; - public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false) - : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE) + public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false, int capacity = -1) + : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE, capacity) { CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient client = storageAccount.CreateCloudBlobClient(); diff --git a/cs/src/core/Device/IDevice.cs b/cs/src/core/Device/IDevice.cs index 692724548..f4dc7b6bb 100644 --- a/cs/src/core/Device/IDevice.cs +++ b/cs/src/core/Device/IDevice.cs @@ -21,6 +21,12 @@ public interface IDevice /// string FileName { get; } + /// + /// Returns the maximum capacity of the storage device, in number of bytes. + /// If returned -1, the storage device has no capacity limit. + /// + public int Capacity { get; } + /// /// Initialize device /// diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index 5ca46e00b..fbe27d388 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -26,8 +26,9 @@ public class LocalStorageDevice : StorageDeviceBase /// /// /// - public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false) - : base(filename, GetSectorSize(filename)) + /// The maximal number of bytes this storage device can accommondate, or -1 if there is no such limit + public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, int capacity = -1) + : base(filename, GetSectorSize(filename), capacity) { Native32.EnableProcessPrivileges(); this.preallocateFile = preallocateFile; diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index df1b81a6f..6bdf0dea4 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -27,8 +27,9 @@ public class ManagedLocalStorageDevice : StorageDeviceBase /// /// /// - public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false) - : base(filename, GetSectorSize(filename)) + /// The maximal number of bytes this storage device can accommondate, or -1 if there is no such limit + public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, int capacity = -1) + : base(filename, GetSectorSize(filename), capacity) { pool = new SectorAlignedBufferPool(1, 1); diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 7b9749858..85ca7314d 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -28,6 +28,11 @@ public abstract class StorageDeviceBase : IDevice /// public string FileName { get; } + /// + /// Returns the maximum capacity of the storage device, in number of bytes. + /// If returned -1, the storage device has no capacity limit. + /// + public int Capacity { get; } /// /// Segment size @@ -38,11 +43,12 @@ public abstract class StorageDeviceBase : IDevice private ulong segmentSizeMask; /// - /// + /// Initializes a new StorageDeviceBase /// - /// - /// - public StorageDeviceBase(string filename, uint sectorSize) + /// Name of the file to use + /// The smallest unit of write of the underlying storage device (e.g. 512 bytes for a disk) + /// The maximal number of bytes this storage device can accommondate, or -1 if there is no such limit + public StorageDeviceBase(string filename, uint sectorSize, int capacity) { FileName = filename; SectorSize = sectorSize; @@ -50,6 +56,8 @@ public StorageDeviceBase(string filename, uint sectorSize) segmentSize = -1; segmentSizeBits = 64; segmentSizeMask = ~0UL; + + Capacity = capacity; } /// From 0be91793b3faa9626c9e0413ae031fdb01801b51 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 13:12:45 -0700 Subject: [PATCH 08/56] Add docs --- cs/src/core/Device/AzurePageBlobDevice.cs | 35 ++++++++++++++++++----- cs/src/core/Device/Devices.cs | 11 +++++++ 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 982119dff..8efd86222 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -16,11 +16,23 @@ public class AzurePageBlobDevice : StorageDeviceBase private readonly ConcurrentDictionary blobs; private readonly string blobName; private readonly bool deleteOnClose; - // Azure Page Blobs permit blobs of max size 8 TB, but the emulator permits only 2 GB - const long MAX_BLOB_SIZE = (long)(2 * 10e8); - // Azure Page Blobs have a fixed sector size of 512 bytes. - const uint PAGE_BLOB_SECTOR_SIZE = 512; + // Page Blobs permit blobs of max size 8 TB, but the emulator permits only 2 GB + private const long MAX_BLOB_SIZE = (long)(2 * 10e8); + // Azure Page Blobs have a fixed sector size of 512 bytes. + private const uint PAGE_BLOB_SECTOR_SIZE = 512; + + /// + /// A IDevice Implementation that is backed by Azure Page Blob. + /// This device is expected to be an order of magnitude slower than local SSD or HDD, but provide scalability and shared access in the cloud. + /// + /// The connection string to use when estblishing connection to Azure Blobs + /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created + /// A descriptive name that will be the prefix of all blobs created with this device + /// + /// True if the program should delete all blobs created on call to Close. False otherwise. + /// The container is not deleted even if it was created in this constructor + /// public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false) : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE) { @@ -33,6 +45,9 @@ public AzurePageBlobDevice(string connectionString, string containerName, string this.deleteOnClose = deleteOnClose; } + /// + /// Inherited + /// public override void Close() { // Unlike in LocalStorageDevice, we explicitly remove all page blobs if the deleteOnClose flag is set, instead of relying on the operating system @@ -47,7 +62,9 @@ public override void Close() } } } - + /// + /// Inherited + /// public override void DeleteSegmentRange(int fromSegment, int toSegment) { for (int i = fromSegment; i < toSegment; i++) @@ -58,7 +75,9 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) } } } - + /// + /// Inherited + /// public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) { CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); @@ -73,7 +92,9 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr // function. I wasn't able to find any good documentaiton on how exceptions are propagated or handled in this scenario. pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => callback(0, readLength, ovNative), asyncResult); } - + /// + /// Inherited + /// public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index e98744057..836b526af 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -46,6 +46,17 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru // TODO(Tianyu): How do we want to integrate the option of using AzurePageBlobDevice into the original static factory class? We can either follow the original pattern and somehow encode this in the string path argument, // or use concrete factories that are initialized per instance to only create one type. + /// + /// Creates a log device backed by Azure Page Blob. + /// + /// A descriptive name that will be the prefix of all blobs created with this device + /// The connection string to use when estblishing connection to Azure Blobs + /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created + /// + /// True if the program should delete all blobs created on call to Close. False otherwise. + /// The container is not deleted even if it was created in this constructor + /// + /// The constructed Device instance public static IDevice CreateAzurePageBlobDevice(string blobName, string storageString = EMULATED_STORAGE_STRING, string containerName = TEST_CONTAINER, bool deleteOnClose = false) { return new AzurePageBlobDevice(storageString, containerName, blobName, deleteOnClose); From f84e0a9eaf86abdacedf65d4861cd9e00d56585f Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 13:23:17 -0700 Subject: [PATCH 09/56] Download storage emulator at the start of the pipeline instead. --- azure-pipelines.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 37f283248..f7070d7aa 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -37,9 +37,15 @@ jobs: solution: '$(solution)' platform: '$(buildPlatform)' configuration: '$(buildConfiguration)' + + - powershell: 'Invoke-WebRequest -OutFile azure-storage-emulator.msi -Uri "https://go.microsoft.com/fwlink/?LinkId=717179&clcid=0x409"' + displayName: 'Download Azure Storage Emulator' - - script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' - displayName: 'Init Test Db' + - powershell: 'msiexec /quiet /a .\azure-storage-emulator.msi' + displayName: 'Install Azure Storage Emulator' + + #- script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' + # displayName: 'Init Test Db' - script: '"C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start' displayName: 'Start Storage Emulator' From e651ec81259827b8437dd43a53e9a1c64b696df3 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 13:34:43 -0700 Subject: [PATCH 10/56] Force reinstall --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f7070d7aa..7b2beafb0 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -41,7 +41,7 @@ jobs: - powershell: 'Invoke-WebRequest -OutFile azure-storage-emulator.msi -Uri "https://go.microsoft.com/fwlink/?LinkId=717179&clcid=0x409"' displayName: 'Download Azure Storage Emulator' - - powershell: 'msiexec /quiet /a .\azure-storage-emulator.msi' + - powershell: 'msiexec /quiet /a .\azure-storage-emulator.msi REINSTALL=ALL REINSTALLMODE=A' displayName: 'Install Azure Storage Emulator' #- script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' From f5e4dbd7c80b07641d644d2e7eb1748c23c0af32 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 13:41:39 -0700 Subject: [PATCH 11/56] Re-enable SQL server intialization to make storage simulator happy --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7b2beafb0..ba99c3fb3 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -44,8 +44,8 @@ jobs: - powershell: 'msiexec /quiet /a .\azure-storage-emulator.msi REINSTALL=ALL REINSTALLMODE=A' displayName: 'Install Azure Storage Emulator' - #- script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' - # displayName: 'Init Test Db' + - script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' + displayName: 'Init Test Db' - script: '"C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start' displayName: 'Start Storage Emulator' From b7673f2050075d83f8dfed7beecf8ac0533c6885 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 14:14:58 -0700 Subject: [PATCH 12/56] WAT --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ba99c3fb3..f53167d43 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -41,7 +41,7 @@ jobs: - powershell: 'Invoke-WebRequest -OutFile azure-storage-emulator.msi -Uri "https://go.microsoft.com/fwlink/?LinkId=717179&clcid=0x409"' displayName: 'Download Azure Storage Emulator' - - powershell: 'msiexec /quiet /a .\azure-storage-emulator.msi REINSTALL=ALL REINSTALLMODE=A' + - powershell: 'msiexec /passive /a azure-storage-emulator.msi REINSTALL=ALL REINSTALLMODE=A' displayName: 'Install Azure Storage Emulator' - script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' From e3c75fd0e90e58468a0705989380cb77a97cfdd2 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 14:25:41 -0700 Subject: [PATCH 13/56] Fix all warnings. Add a step to (hopefully) remove old versions --- azure-pipelines.yml | 3 +++ cs/src/core/Device/AzurePageBlobDevice.cs | 7 +++++-- cs/src/core/Device/Devices.cs | 8 ++++---- cs/src/core/Device/LocalStorageDevice.cs | 10 ++++++++++ 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f53167d43..1a10b3278 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -41,6 +41,9 @@ jobs: - powershell: 'Invoke-WebRequest -OutFile azure-storage-emulator.msi -Uri "https://go.microsoft.com/fwlink/?LinkId=717179&clcid=0x409"' displayName: 'Download Azure Storage Emulator' + - powershell: 'msiexec /passive /uninstall azure-storage-emulator.msi' + displayName: 'Remove old Azure Storage Emulator' + - powershell: 'msiexec /passive /a azure-storage-emulator.msi REINSTALL=ALL REINSTALLMODE=A' displayName: 'Install Azure Storage Emulator' diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 8efd86222..30ffea0f4 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -10,6 +10,10 @@ namespace FASTER.core { + /// + /// A IDevice Implementation that is backed byAzure Page Blob. + /// This device is expected to be an order of magnitude slower than local SSD or HDD, but provide scalability and shared access in the cloud. + /// public class AzurePageBlobDevice : StorageDeviceBase { private CloudBlobContainer container; @@ -23,8 +27,7 @@ public class AzurePageBlobDevice : StorageDeviceBase private const uint PAGE_BLOB_SECTOR_SIZE = 512; /// - /// A IDevice Implementation that is backed by Azure Page Blob. - /// This device is expected to be an order of magnitude slower than local SSD or HDD, but provide scalability and shared access in the cloud. + /// Constructs a new AzurePageBlobDevice instance /// /// The connection string to use when estblishing connection to Azure Blobs /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index 836b526af..ec33f12be 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -13,8 +13,8 @@ namespace FASTER.core /// public static class Devices { - public const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; - public const string TEST_CONTAINER = "test"; + private const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; + private const string TEST_CONTAINER = "test"; /// /// Create a storage device for the log @@ -57,9 +57,9 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru /// The container is not deleted even if it was created in this constructor /// /// The constructed Device instance - public static IDevice CreateAzurePageBlobDevice(string blobName, string storageString = EMULATED_STORAGE_STRING, string containerName = TEST_CONTAINER, bool deleteOnClose = false) + public static IDevice CreateAzurePageBlobDevice(string blobName, string connectionString = EMULATED_STORAGE_STRING, string containerName = TEST_CONTAINER, bool deleteOnClose = false) { - return new AzurePageBlobDevice(storageString, containerName, blobName, deleteOnClose); + return new AzurePageBlobDevice(connectionString, containerName, blobName, deleteOnClose); } } diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index 5ca46e00b..3cdbd40f1 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -153,11 +153,21 @@ public override void Close() logHandle.Dispose(); } + /// + /// + /// + /// + /// protected string GetSegmentName(int segmentId) { return FileName + "." + segmentId; } + /// + /// + /// + /// + /// // Can be used to pre-load handles, e.g., after a checkpoint protected SafeFileHandle GetOrAddHandle(int _segmentId) { From 27cb31534f0711d45b34a17246902035a6acaa34 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 14:33:09 -0700 Subject: [PATCH 14/56] Attempt to install to alternative directory --- azure-pipelines.yml | 7 ++----- cs/src/core/Device/Devices.cs | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 1a10b3278..944348828 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -41,16 +41,13 @@ jobs: - powershell: 'Invoke-WebRequest -OutFile azure-storage-emulator.msi -Uri "https://go.microsoft.com/fwlink/?LinkId=717179&clcid=0x409"' displayName: 'Download Azure Storage Emulator' - - powershell: 'msiexec /passive /uninstall azure-storage-emulator.msi' - displayName: 'Remove old Azure Storage Emulator' - - - powershell: 'msiexec /passive /a azure-storage-emulator.msi REINSTALL=ALL REINSTALLMODE=A' + - powershell: 'msiexec /passive /a azure-storage-emulator.msi TARGETDIR="C:\storage-emulator"' displayName: 'Install Azure Storage Emulator' - script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' displayName: 'Init Test Db' - - script: '"C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start' + - script: '"C:\storage-emulator\AzureStorageEmulator.exe" start' displayName: 'Start Storage Emulator' - task: VSTest@2 diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index ec33f12be..347157694 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -53,7 +53,7 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru /// The connection string to use when estblishing connection to Azure Blobs /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created /// - /// True if the program should delete all blobs created on call to Close. False otherwise. + /// True if the program should delete all blobs created on call to Close. False otherwise. /// The container is not deleted even if it was created in this constructor /// /// The constructed Device instance From 8915d0b5562b9bac3eb19728bcebb6c1e2073a94 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 14:42:50 -0700 Subject: [PATCH 15/56] attempt to dump log and see what's wrong --- azure-pipelines.yml | 5 ++++- cs/src/core/Device/Devices.cs | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 944348828..6314e7864 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -41,9 +41,12 @@ jobs: - powershell: 'Invoke-WebRequest -OutFile azure-storage-emulator.msi -Uri "https://go.microsoft.com/fwlink/?LinkId=717179&clcid=0x409"' displayName: 'Download Azure Storage Emulator' - - powershell: 'msiexec /passive /a azure-storage-emulator.msi TARGETDIR="C:\storage-emulator"' + - powershell: 'msiexec /passive /lvx installation.log /a azure-storage-emulator.msi TARGETDIR="C:\storage-emulator"' displayName: 'Install Azure Storage Emulator' + - powershell: 'cat installation.log' + displayName: 'Dump log' + - script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' displayName: 'Init Test Db' diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index 347157694..68fb5a02a 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -53,7 +53,7 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru /// The connection string to use when estblishing connection to Azure Blobs /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created /// - /// True if the program should delete all blobs created on call to Close. False otherwise. + /// True if the program should delete all blobs created on call to Close. False otherwise. /// The container is not deleted even if it was created in this constructor /// /// The constructed Device instance From ba9236dcaef3f6b5986d0d42faada55439c5ed0b Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 14:52:52 -0700 Subject: [PATCH 16/56] Correct installation path. --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 6314e7864..37831f44f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -50,7 +50,7 @@ jobs: - script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' displayName: 'Init Test Db' - - script: '"C:\storage-emulator\AzureStorageEmulator.exe" start' + - script: '"C:\storage-emulator\root\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start' displayName: 'Start Storage Emulator' - task: VSTest@2 From 636441a5b1b11fc7455364eb5d22e6220b1ccfa7 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 15:04:33 -0700 Subject: [PATCH 17/56] Remove debug log step. --- azure-pipelines.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 37831f44f..2237f2472 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -44,9 +44,6 @@ jobs: - powershell: 'msiexec /passive /lvx installation.log /a azure-storage-emulator.msi TARGETDIR="C:\storage-emulator"' displayName: 'Install Azure Storage Emulator' - - powershell: 'cat installation.log' - displayName: 'Dump log' - - script: '"C:\Program Files\Microsoft SQL Server\130\Tools\Binn\SqlLocalDB.exe" create "v13.0" 13.0 -s' displayName: 'Init Test Db' From d1ff3619cd4dc844a973200a87122303f0c64284 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 2 Jul 2019 15:10:03 -0700 Subject: [PATCH 18/56] Add skeleton implementation for tiered storage device --- cs/src/core/Device/TieredStorageDevice.cs | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 cs/src/core/Device/TieredStorageDevice.cs diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs new file mode 100644 index 000000000..230f065c8 --- /dev/null +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -0,0 +1,35 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; + +namespace FASTER.core.Device +{ + class TieredStorageDevice : StorageDeviceBase + { + private readonly IList devices; + private readonly uint commitPoint; + + public TieredStorageDevice() : base("", 512, -1) {} + + public override void Close() + { + throw new NotImplementedException(); + } + + public override void DeleteSegmentRange(int fromSegment, int toSegment) + { + throw new NotImplementedException(); + } + + public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) + { + throw new NotImplementedException(); + } + + public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + { + throw new NotImplementedException(); + } + } +} From b352832434474d7a1023f3160fda94a18110ed90 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Wed, 3 Jul 2019 14:17:56 -0700 Subject: [PATCH 19/56] Implement TieredStorageDevice with inclusive policy and no range shifting --- cs/src/core/Device/AzurePageBlobDevice.cs | 4 +- cs/src/core/Device/IDevice.cs | 7 +- cs/src/core/Device/LocalStorageDevice.cs | 4 +- .../core/Device/ManagedLocalStorageDevice.cs | 4 +- cs/src/core/Device/NullDevice.cs | 2 +- cs/src/core/Device/StorageDeviceBase.cs | 15 ++- cs/src/core/Device/TieredStorageDevice.cs | 124 +++++++++++++++++- 7 files changed, 140 insertions(+), 20 deletions(-) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 0f438e8de..5f07a8b3e 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -36,8 +36,8 @@ public class AzurePageBlobDevice : StorageDeviceBase /// True if the program should delete all blobs created on call to Close. False otherwise. /// The container is not deleted even if it was created in this constructor /// - /// The maximum number of bytes this storage device can accommondate, or -1 if there is no such limit - public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false, int capacity = -1) + /// The maximum number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit + public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false, ulong capacity = CAPACITY_UNSPECIFIED) : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE, capacity) { CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); diff --git a/cs/src/core/Device/IDevice.cs b/cs/src/core/Device/IDevice.cs index f4dc7b6bb..d362026ec 100644 --- a/cs/src/core/Device/IDevice.cs +++ b/cs/src/core/Device/IDevice.cs @@ -23,9 +23,9 @@ public interface IDevice /// /// Returns the maximum capacity of the storage device, in number of bytes. - /// If returned -1, the storage device has no capacity limit. + /// If returned CAPACITY_UNSPECIFIED, the storage device has no specfied capacity limit. /// - public int Capacity { get; } + ulong Capacity { get; } /// /// Initialize device @@ -33,9 +33,8 @@ public interface IDevice /// void Initialize(long segmentSize); - + /* Segmented addressing API */ - /// /// Write /// diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index 3ac3c4a44..c604e2e05 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -26,8 +26,8 @@ public class LocalStorageDevice : StorageDeviceBase /// /// /// - /// The maximal number of bytes this storage device can accommondate, or -1 if there is no such limit - public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, int capacity = -1) + /// The maximum number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit + public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, ulong capacity = CAPACITY_UNSPECIFIED) : base(filename, GetSectorSize(filename), capacity) { Native32.EnableProcessPrivileges(); diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index 6bdf0dea4..c3804338f 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -27,8 +27,8 @@ public class ManagedLocalStorageDevice : StorageDeviceBase /// /// /// - /// The maximal number of bytes this storage device can accommondate, or -1 if there is no such limit - public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, int capacity = -1) + /// The maximal number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit + public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, ulong capacity = CAPACITY_UNSPECIFIED) : base(filename, GetSectorSize(filename), capacity) { pool = new SectorAlignedBufferPool(1, 1); diff --git a/cs/src/core/Device/NullDevice.cs b/cs/src/core/Device/NullDevice.cs index 0b5b66a54..1cab439af 100644 --- a/cs/src/core/Device/NullDevice.cs +++ b/cs/src/core/Device/NullDevice.cs @@ -14,7 +14,7 @@ public class NullDevice : StorageDeviceBase /// /// /// - public NullDevice() : base("null", 512) + public NullDevice() : base("null", 512, CAPACITY_UNSPECIFIED) { } diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 85ca7314d..0438502d3 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -18,6 +18,12 @@ namespace FASTER.core /// public abstract class StorageDeviceBase : IDevice { + + /// + /// This value is supplied for capacity when the device does not have a specified limit. + /// + public const ulong CAPACITY_UNSPECIFIED = ulong.MaxValue; + /// /// /// @@ -32,7 +38,7 @@ public abstract class StorageDeviceBase : IDevice /// Returns the maximum capacity of the storage device, in number of bytes. /// If returned -1, the storage device has no capacity limit. /// - public int Capacity { get; } + public ulong Capacity { get; } /// /// Segment size @@ -47,8 +53,8 @@ public abstract class StorageDeviceBase : IDevice /// /// Name of the file to use /// The smallest unit of write of the underlying storage device (e.g. 512 bytes for a disk) - /// The maximal number of bytes this storage device can accommondate, or -1 if there is no such limit - public StorageDeviceBase(string filename, uint sectorSize, int capacity) + /// The maximal number of bytes this storage device can accommondate, or CAPAPCITY_UNSPECIFIED if there is no such limit + public StorageDeviceBase(string filename, uint sectorSize, ulong capacity) { FileName = filename; SectorSize = sectorSize; @@ -66,6 +72,9 @@ public StorageDeviceBase(string filename, uint sectorSize, int capacity) /// public void Initialize(long segmentSize) { + // TODO(Tianyu): Alternatively, we can adjust capacity based on the segment size: given a phsyical upper limit of capacity, + // we only make use of (Capacity / segmentSize * segmentSize) many bytes. + Debug.Assert(Capacity % segmentSize == 0, "capacity must be a multiple of segment sizes"); this.segmentSize = segmentSize; if (!Utility.IsPowerOfTwo(segmentSize)) { diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index 230f065c8..9e421cef0 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using System.Diagnostics; using System.Threading; namespace FASTER.core.Device @@ -8,28 +9,139 @@ namespace FASTER.core.Device class TieredStorageDevice : StorageDeviceBase { private readonly IList devices; - private readonly uint commitPoint; + private readonly int commitPoint; + // Because it is assumed that tiers are inclusive with one another, we only need to store the starting address of the log portion avialable on each tier. + // That implies this list is sorted in descending order with the last tier being 0 always. + private readonly ulong[] tierStartAddresses; - public TieredStorageDevice() : base("", 512, -1) {} + // TODO(Tianyu): So far, I don't believe sector size is used anywhere in the code. Therefore I am not reasoning about what the + // sector size of a tiered storage should be when different tiers can have different sector sizes. + /// + /// Constructs a new TieredStorageDevice composed of the given devices. + /// + /// + /// List of devices to be used. The list should be given in order of hot to cold. Read is served from the + /// device with smallest index in the list that has the requested data + /// + /// + public TieredStorageDevice(int commitPoint, IList devices) : base(ComputeFileString(devices, commitPoint), 512, ComputeCapacity(devices)) + { + Debug.Assert(commitPoint >= 0 && commitPoint < devices.Count, "commit point is out of range"); + this.devices = devices; + this.commitPoint = commitPoint; + tierStartAddresses = Array.CreateInstance(typeof(IDevice), devices.Count); + tierStartAddresses.Initialize(); + } + + public TieredStorageDevice(int commitPoint, params IDevice[] devices) : this(commitPoint, (IList)devices) + { + } public override void Close() { - throw new NotImplementedException(); + foreach (IDevice device in devices) + { + device.Close(); + } + } + + public override void DeleteAddressRange(long fromAddress, long toAddress) + { + // TODO(Tianyu): concurrency + int fromStartTier = FindClosestDeviceContaining(fromAddress); + int toStartTier = FindClosestDeviceContaining(toAddress); + for (int i = fromStartTier; i < toStartTier; i++) + { + // Because our tiered storage is inclusive, + devices[i].DeleteAddressRange(Math.Max(fromAddress, tierStartAddresses[i]), toAddress); + } } public override void DeleteSegmentRange(int fromSegment, int toSegment) { - throw new NotImplementedException(); + throw new NotSupportedException(); + } + + public override void ReadAsync(ulong alignedSourceAddress, IntPtr aligneDestinationAddress, uint alignedReadLength, IOCompletionCallback callback, IAsyncResult asyncResulte) + { + // TODO(Tianyu): This whole operation needs to be thread-safe with concurrent calls to writes, which may trigger a change in start address. + IDevice closestDevice = devices[FindClosestDeviceContaining(alignedSoourceAddress)]; + // We can directly forward the address, because assuming an inclusive policy, all devices agree on the same address space. The only difference is that some segments may not + // be present for certain devices. + closestDevice.ReadAsync(alignedSourceAddress, alignedDestinationAddress, alignedReadLength, callback, asyncResulte); } public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) { - throw new NotImplementedException(); + // If it is not guaranteed that all underlying tiers agree on a segment size, this API cannot have a meaningful implementation + throw new NotSupportedException(); + } + + public override void WriteAsync(IntPtr sourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + { + int startTier = FindClosestDeviceContaining(alignedDestinationAddress); + // TODO(Tianyu): Can you ever initiate a write that is after the commit point? Given FASTER's model of a read-only region, this will probably never happen. + Debug.Assert(startTier >= commitPoint, "Write should not elide the commit point"); + // TODO(Tianyu): This whole operation needs to be thread-safe with concurrent calls to reads. + for (int i = startTier; i < devices.Count; i++) + { + if (i == commitPoint) + { + // Only if the write is complete on the commit point should we invoke the call back. + devices[i].WriteAsync(sourceAddress, alignedDestinationAddress, numBytesToWrite, callback, asyncResult); + } + else + { + // Otherwise, simply issue the write without caring about callbacks + devices[i].WriteAsync(sourceAddress, alignedDestinationAddress, numBytesToWrite, (e, n, o) => { }, null); + } + } } public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - throw new NotImplementedException(); + // If it is not guaranteed that all underlying tiers agree on a segment size, this API cannot have a meaningful implementation + throw new NotSupportedException(); + } + + private static ulong ComputeCapacity(IList devices) + { + ulong result = 0; + // The capacity of a tiered storage device is the sum of the capacity of its tiers + foreach (IDevice device in devices) + { + // Unless the last tier device has unspecified storage capacity, in which case the tiered storage also has unspecified capacity + if (device.Capacity == CAPACITY_UNSPECIFIED) + { + // TODO(Tianyu): Is this assumption too strong? + Debug.Assert(device == devices[devices.Count - 1], "Only the last tier storage of a tiered storage device can have unspecified capacity"); + return CAPACITY_UNSPECIFIED; + } + result += device.Capacity; + } + return result; + } + + // TODO(Tianyu): Is the notion of file name still relevant in a tiered storage device? + private static string ComputeFileString(IList devices, int commitPoint) + { + StringBuilder result = new StringBuilder(); + foreach (IDevice device in devices) + { + result.AppendFormat("{0}, file name {1}, capacity {2} bytes;", device.GetType().Name, device.FileName, device.Capacity == CAPACITY_UNSPECIFIED ? "unspecified" : device.Capacity.ToString()); + } + result.AppendFormat("commit point: {0} at tier {1}", devices[commitPoint].GetType().Name, commitPoint); + return result.ToString(); + } + + private int FindClosestDeviceContaining(ulong address) + { + // TODO(Tianyu): Will linear search be faster for small number of tiers (which would be the common case)? + // binary search where the array is sorted in reverse order to the default ulong comparator + int tier = Array.BinarySearch(tierStartAddresses, 0, tierStartAddresses.Length, alignedStartAddress, (x, y) => y.CompareTo(x)); + // Binary search returns either the index or bitwise complement of the index of the first element smaller than start address. + // We want the first element with start address smaller than given address. + return tier >= 0 ? ++tier : ~tier; } } } From f277d617ca93982407e701ee2f49bf213ef2ec8c Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Fri, 5 Jul 2019 10:32:21 -0700 Subject: [PATCH 20/56] fix compilation --- cs/src/core/Device/AzurePageBlobDevice.cs | 2 +- cs/src/core/Device/IDevice.cs | 2 +- cs/src/core/Device/LocalStorageDevice.cs | 2 +- .../core/Device/ManagedLocalStorageDevice.cs | 2 +- cs/src/core/Device/StorageDeviceBase.cs | 12 +++++------ cs/src/core/Device/TieredStorageDevice.cs | 20 +++++++++---------- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 5f07a8b3e..0832e5379 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -37,7 +37,7 @@ public class AzurePageBlobDevice : StorageDeviceBase /// The container is not deleted even if it was created in this constructor /// /// The maximum number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit - public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false, ulong capacity = CAPACITY_UNSPECIFIED) + public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED) : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE, capacity) { CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); diff --git a/cs/src/core/Device/IDevice.cs b/cs/src/core/Device/IDevice.cs index d362026ec..72edb58f8 100644 --- a/cs/src/core/Device/IDevice.cs +++ b/cs/src/core/Device/IDevice.cs @@ -25,7 +25,7 @@ public interface IDevice /// Returns the maximum capacity of the storage device, in number of bytes. /// If returned CAPACITY_UNSPECIFIED, the storage device has no specfied capacity limit. /// - ulong Capacity { get; } + long Capacity { get; } /// /// Initialize device diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index c604e2e05..664ac1524 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -27,7 +27,7 @@ public class LocalStorageDevice : StorageDeviceBase /// /// /// The maximum number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit - public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, ulong capacity = CAPACITY_UNSPECIFIED) + public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED) : base(filename, GetSectorSize(filename), capacity) { Native32.EnableProcessPrivileges(); diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index c3804338f..bf0ffc667 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -28,7 +28,7 @@ public class ManagedLocalStorageDevice : StorageDeviceBase /// /// /// The maximal number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit - public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, ulong capacity = CAPACITY_UNSPECIFIED) + public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED) : base(filename, GetSectorSize(filename), capacity) { pool = new SectorAlignedBufferPool(1, 1); diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 0438502d3..630d9a8c0 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -22,7 +22,7 @@ public abstract class StorageDeviceBase : IDevice /// /// This value is supplied for capacity when the device does not have a specified limit. /// - public const ulong CAPACITY_UNSPECIFIED = ulong.MaxValue; + public const long CAPACITY_UNSPECIFIED = long.MaxValue; /// /// @@ -38,7 +38,7 @@ public abstract class StorageDeviceBase : IDevice /// Returns the maximum capacity of the storage device, in number of bytes. /// If returned -1, the storage device has no capacity limit. /// - public ulong Capacity { get; } + public long Capacity { get; } /// /// Segment size @@ -54,7 +54,7 @@ public abstract class StorageDeviceBase : IDevice /// Name of the file to use /// The smallest unit of write of the underlying storage device (e.g. 512 bytes for a disk) /// The maximal number of bytes this storage device can accommondate, or CAPAPCITY_UNSPECIFIED if there is no such limit - public StorageDeviceBase(string filename, uint sectorSize, ulong capacity) + public StorageDeviceBase(string filename, uint sectorSize, long capacity) { FileName = filename; SectorSize = sectorSize; @@ -98,7 +98,7 @@ public void Initialize(long segmentSize) /// /// /// - public void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + public virtual void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { var segment = segmentSizeBits < 64 ? alignedDestinationAddress >> segmentSizeBits : 0; WriteAsync( @@ -116,7 +116,7 @@ public void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddr /// /// /// - public void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult) + public virtual void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult) { var segment = segmentSizeBits < 64 ? alignedSourceAddress >> segmentSizeBits : 0; @@ -132,7 +132,7 @@ public void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddre /// /// /// - public void DeleteAddressRange(long fromAddress, long toAddress) + public virtual void DeleteAddressRange(long fromAddress, long toAddress) { var fromSegment = segmentSizeBits < 64 ? fromAddress >> segmentSizeBits : 0; var toSegment = segmentSizeBits < 64 ? toAddress >> segmentSizeBits : 0; diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index 9e421cef0..7bebfdcdd 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -29,7 +29,7 @@ public TieredStorageDevice(int commitPoint, IList devices) : base(Compu Debug.Assert(commitPoint >= 0 && commitPoint < devices.Count, "commit point is out of range"); this.devices = devices; this.commitPoint = commitPoint; - tierStartAddresses = Array.CreateInstance(typeof(IDevice), devices.Count); + tierStartAddresses = (ulong[])Array.CreateInstance(typeof(ulong), devices.Count); tierStartAddresses.Initialize(); } @@ -48,12 +48,12 @@ public override void Close() public override void DeleteAddressRange(long fromAddress, long toAddress) { // TODO(Tianyu): concurrency - int fromStartTier = FindClosestDeviceContaining(fromAddress); - int toStartTier = FindClosestDeviceContaining(toAddress); + int fromStartTier = FindClosestDeviceContaining((ulong)fromAddress); + int toStartTier = FindClosestDeviceContaining((ulong)toAddress); for (int i = fromStartTier; i < toStartTier; i++) { // Because our tiered storage is inclusive, - devices[i].DeleteAddressRange(Math.Max(fromAddress, tierStartAddresses[i]), toAddress); + devices[i].DeleteAddressRange((long)Math.Max((ulong)fromAddress, tierStartAddresses[i]), toAddress); } } @@ -62,10 +62,10 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) throw new NotSupportedException(); } - public override void ReadAsync(ulong alignedSourceAddress, IntPtr aligneDestinationAddress, uint alignedReadLength, IOCompletionCallback callback, IAsyncResult asyncResulte) + public override void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint alignedReadLength, IOCompletionCallback callback, IAsyncResult asyncResulte) { // TODO(Tianyu): This whole operation needs to be thread-safe with concurrent calls to writes, which may trigger a change in start address. - IDevice closestDevice = devices[FindClosestDeviceContaining(alignedSoourceAddress)]; + IDevice closestDevice = devices[FindClosestDeviceContaining(alignedSourceAddress)]; // We can directly forward the address, because assuming an inclusive policy, all devices agree on the same address space. The only difference is that some segments may not // be present for certain devices. closestDevice.ReadAsync(alignedSourceAddress, alignedDestinationAddress, alignedReadLength, callback, asyncResulte); @@ -77,7 +77,7 @@ public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destin throw new NotSupportedException(); } - public override void WriteAsync(IntPtr sourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + public override unsafe void WriteAsync(IntPtr sourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { int startTier = FindClosestDeviceContaining(alignedDestinationAddress); // TODO(Tianyu): Can you ever initiate a write that is after the commit point? Given FASTER's model of a read-only region, this will probably never happen. @@ -104,9 +104,9 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti throw new NotSupportedException(); } - private static ulong ComputeCapacity(IList devices) + private static long ComputeCapacity(IList devices) { - ulong result = 0; + long result = 0; // The capacity of a tiered storage device is the sum of the capacity of its tiers foreach (IDevice device in devices) { @@ -138,7 +138,7 @@ private int FindClosestDeviceContaining(ulong address) { // TODO(Tianyu): Will linear search be faster for small number of tiers (which would be the common case)? // binary search where the array is sorted in reverse order to the default ulong comparator - int tier = Array.BinarySearch(tierStartAddresses, 0, tierStartAddresses.Length, alignedStartAddress, (x, y) => y.CompareTo(x)); + int tier = Array.BinarySearch(tierStartAddresses, 0, tierStartAddresses.Length, address, Comparer.Create((x, y) => y.CompareTo(x))); // Binary search returns either the index or bitwise complement of the index of the first element smaller than start address. // We want the first element with start address smaller than given address. return tier >= 0 ? ++tier : ~tier; From 15229118232e0729e506d88c8e9f5d8b9958cf45 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Fri, 5 Jul 2019 14:32:17 -0700 Subject: [PATCH 21/56] Draft about how updating of storage range might work. --- cs/src/core/Device/TieredStorageDevice.cs | 57 +++++++++++++++++++---- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index 7bebfdcdd..3b0f0e4de 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -10,9 +10,13 @@ class TieredStorageDevice : StorageDeviceBase { private readonly IList devices; private readonly int commitPoint; + // TODO(Tianyu): For some retarded reason Interlocked provides no CompareExchange for unsigned primitives. // Because it is assumed that tiers are inclusive with one another, we only need to store the starting address of the log portion avialable on each tier. // That implies this list is sorted in descending order with the last tier being 0 always. - private readonly ulong[] tierStartAddresses; + private readonly long[] tierStartAddresses; + // Because the device has no access to in-memory log tail information, we need to keep track of that ourselves. Currently this is done by keeping a high-water + // mark of the addresses seen in the WriteAsyncMethod. + private long logHead; // TODO(Tianyu): So far, I don't believe sector size is used anywhere in the code. Therefore I am not reasoning about what the // sector size of a tiered storage should be when different tiers can have different sector sizes. @@ -24,13 +28,16 @@ class TieredStorageDevice : StorageDeviceBase /// device with smallest index in the list that has the requested data /// /// + // TODO(Tianyu): Recovering from a tiered device is potentially difficult, because we also need to recover their respective ranges. public TieredStorageDevice(int commitPoint, IList devices) : base(ComputeFileString(devices, commitPoint), 512, ComputeCapacity(devices)) { Debug.Assert(commitPoint >= 0 && commitPoint < devices.Count, "commit point is out of range"); this.devices = devices; this.commitPoint = commitPoint; - tierStartAddresses = (ulong[])Array.CreateInstance(typeof(ulong), devices.Count); + tierStartAddresses = (long[])Array.CreateInstance(typeof(long), devices.Count); tierStartAddresses.Initialize(); + // TODO(Tianyu): Change after figuring out how to deal with recovery. + logHead = 0; } public TieredStorageDevice(int commitPoint, params IDevice[] devices) : this(commitPoint, (IList)devices) @@ -48,12 +55,12 @@ public override void Close() public override void DeleteAddressRange(long fromAddress, long toAddress) { // TODO(Tianyu): concurrency - int fromStartTier = FindClosestDeviceContaining((ulong)fromAddress); - int toStartTier = FindClosestDeviceContaining((ulong)toAddress); + int fromStartTier = FindClosestDeviceContaining(fromAddress); + int toStartTier = FindClosestDeviceContaining(toAddress); for (int i = fromStartTier; i < toStartTier; i++) { // Because our tiered storage is inclusive, - devices[i].DeleteAddressRange((long)Math.Max((ulong)fromAddress, tierStartAddresses[i]), toAddress); + devices[i].DeleteAddressRange((long)Math.Max(fromAddress, tierStartAddresses[i]), toAddress); } } @@ -65,7 +72,7 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) public override void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint alignedReadLength, IOCompletionCallback callback, IAsyncResult asyncResulte) { // TODO(Tianyu): This whole operation needs to be thread-safe with concurrent calls to writes, which may trigger a change in start address. - IDevice closestDevice = devices[FindClosestDeviceContaining(alignedSourceAddress)]; + IDevice closestDevice = devices[FindClosestDeviceContaining((long)alignedSourceAddress)]; // We can directly forward the address, because assuming an inclusive policy, all devices agree on the same address space. The only difference is that some segments may not // be present for certain devices. closestDevice.ReadAsync(alignedSourceAddress, alignedDestinationAddress, alignedReadLength, callback, asyncResulte); @@ -79,10 +86,16 @@ public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destin public override unsafe void WriteAsync(IntPtr sourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - int startTier = FindClosestDeviceContaining(alignedDestinationAddress); + long writeHead = (long)alignedDestinationAddress + numBytesToWrite; + // TODO(Tianyu): Think more carefully about how this can interleave. + UpdateLogHead(writeHead); + for (int i = 0; i < devices.Count; i++) + { + UpdateDeviceRange(i, writeHead); + } + int startTier = FindClosestDeviceContaining((long)alignedDestinationAddress); // TODO(Tianyu): Can you ever initiate a write that is after the commit point? Given FASTER's model of a read-only region, this will probably never happen. Debug.Assert(startTier >= commitPoint, "Write should not elide the commit point"); - // TODO(Tianyu): This whole operation needs to be thread-safe with concurrent calls to reads. for (int i = startTier; i < devices.Count; i++) { if (i == commitPoint) @@ -134,7 +147,7 @@ private static string ComputeFileString(IList devices, int commitPoint) return result.ToString(); } - private int FindClosestDeviceContaining(ulong address) + private int FindClosestDeviceContaining(long address) { // TODO(Tianyu): Will linear search be faster for small number of tiers (which would be the common case)? // binary search where the array is sorted in reverse order to the default ulong comparator @@ -143,5 +156,31 @@ private int FindClosestDeviceContaining(ulong address) // We want the first element with start address smaller than given address. return tier >= 0 ? ++tier : ~tier; } + + private void UpdateLogHead(long writeHead) + { + long logHeadLocal; + do + { + logHeadLocal = logHead; + if (logHeadLocal >= writeHead) return; + } while (logHeadLocal != Interlocked.CompareExchange(ref logHead, writeHead, logHeadLocal)); + } + + private void UpdateDeviceRange(int tier, long writeHead) + { + IDevice device = devices[tier]; + // Never need to update range if storage is unbounded + if (device.Capacity == CAPACITY_UNSPECIFIED) return; + + long oldLogTail = tierStartAddresses[tier]; + if (writeHead - oldLogTail > device.Capacity) + { + long newLogTail = writeHead - oldLogTail - device.Capacity; + tierStartAddresses[tier] = newLogTail; + // TODO(Tianyu): There will be a race here with readers. Epoch protection? + device.DeleteAddressRange(oldLogTail, newLogTail); + } + } } } From f685971bdaed88e94ca819d69df157198941648b Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 8 Jul 2019 08:45:55 -0700 Subject: [PATCH 22/56] fix wrong assert --- cs/src/core/Device/StorageDeviceBase.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 630d9a8c0..f257a0814 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -22,7 +22,7 @@ public abstract class StorageDeviceBase : IDevice /// /// This value is supplied for capacity when the device does not have a specified limit. /// - public const long CAPACITY_UNSPECIFIED = long.MaxValue; + public const long CAPACITY_UNSPECIFIED = -1; /// /// @@ -74,7 +74,7 @@ public void Initialize(long segmentSize) { // TODO(Tianyu): Alternatively, we can adjust capacity based on the segment size: given a phsyical upper limit of capacity, // we only make use of (Capacity / segmentSize * segmentSize) many bytes. - Debug.Assert(Capacity % segmentSize == 0, "capacity must be a multiple of segment sizes"); + Debug.Assert(Capacity == -1 || Capacity % segmentSize == 0, "capacity must be a multiple of segment sizes"); this.segmentSize = segmentSize; if (!Utility.IsPowerOfTwo(segmentSize)) { From c3b2d0cac53b351cb3fabd3d34343f3223d5d3bb Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 8 Jul 2019 10:14:57 -0700 Subject: [PATCH 23/56] Fix some bugs and add a simple test case --- cs/src/core/Device/Devices.cs | 9 ++++--- cs/src/core/Device/StorageDeviceBase.cs | 2 +- cs/src/core/Device/TieredStorageDevice.cs | 29 ++++++++++++++++------- cs/test/BasicDiskFASTERTests.cs | 10 ++++++++ 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index 68fb5a02a..90f964b8f 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -13,6 +13,8 @@ namespace FASTER.core /// public static class Devices { + // TODO(Tianyu): Should I just move the constant from storage device base here? + public const long CAPACITY_UNSPECIFIED = StorageDeviceBase.CAPACITY_UNSPECIFIED; private const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; private const string TEST_CONTAINER = "test"; @@ -22,8 +24,9 @@ public static class Devices /// Path to file that will store the log (empty for null device) /// Whether we try to preallocate the file on creation /// Delete files on close + /// /// Device instance - public static IDevice CreateLogDevice(string logPath, bool preallocateFile = true, bool deleteOnClose = false) + public static IDevice CreateLogDevice(string logPath, bool preallocateFile = true, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED) { if (string.IsNullOrWhiteSpace(logPath)) @@ -34,12 +37,12 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru #if DOTNETCORE if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - logDevice = new ManagedLocalStorageDevice(logPath, preallocateFile, deleteOnClose); + logDevice = new ManagedLocalStorageDevice(logPath, preallocateFile, deleteOnClose, capacity); } else #endif { - logDevice = new LocalStorageDevice(logPath, preallocateFile, deleteOnClose); + logDevice = new LocalStorageDevice(logPath, preallocateFile, deleteOnClose, capacity); } return logDevice; } diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index f257a0814..8c696a1b8 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -70,7 +70,7 @@ public StorageDeviceBase(string filename, uint sectorSize, long capacity) /// Initialize device /// /// - public void Initialize(long segmentSize) + public virtual void Initialize(long segmentSize) { // TODO(Tianyu): Alternatively, we can adjust capacity based on the segment size: given a phsyical upper limit of capacity, // we only make use of (Capacity / segmentSize * segmentSize) many bytes. diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index 3b0f0e4de..dcb7d85b7 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -4,7 +4,7 @@ using System.Diagnostics; using System.Threading; -namespace FASTER.core.Device +namespace FASTER.core { class TieredStorageDevice : StorageDeviceBase { @@ -32,6 +32,7 @@ class TieredStorageDevice : StorageDeviceBase public TieredStorageDevice(int commitPoint, IList devices) : base(ComputeFileString(devices, commitPoint), 512, ComputeCapacity(devices)) { Debug.Assert(commitPoint >= 0 && commitPoint < devices.Count, "commit point is out of range"); + // TODO(Tianyu): Should assert that passed in devices are not yet initialized. This is more challenging for recovering. this.devices = devices; this.commitPoint = commitPoint; tierStartAddresses = (long[])Array.CreateInstance(typeof(long), devices.Count); @@ -44,10 +45,19 @@ public TieredStorageDevice(int commitPoint, params IDevice[] devices) : this(com { } + public override void Initialize(long segmentSize) + { + foreach (IDevice devices in devices) + { + devices.Initialize(segmentSize); + } + } + public override void Close() { foreach (IDevice device in devices) { + // TODO(Tianyu): All writes need to have succeeded when we call this. device.Close(); } } @@ -95,7 +105,7 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, ulong alignedDestin } int startTier = FindClosestDeviceContaining((long)alignedDestinationAddress); // TODO(Tianyu): Can you ever initiate a write that is after the commit point? Given FASTER's model of a read-only region, this will probably never happen. - Debug.Assert(startTier >= commitPoint, "Write should not elide the commit point"); + Debug.Assert(startTier <= commitPoint, "Write should not elide the commit point"); for (int i = startTier; i < devices.Count; i++) { if (i == commitPoint) @@ -106,6 +116,7 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, ulong alignedDestin else { // Otherwise, simply issue the write without caring about callbacks + // TODO(Tianyu): We may need some type of count down to verify that all writes are finished before closing a device. devices[i].WriteAsync(sourceAddress, alignedDestinationAddress, numBytesToWrite, (e, n, o) => { }, null); } } @@ -149,12 +160,14 @@ private static string ComputeFileString(IList devices, int commitPoint) private int FindClosestDeviceContaining(long address) { - // TODO(Tianyu): Will linear search be faster for small number of tiers (which would be the common case)? - // binary search where the array is sorted in reverse order to the default ulong comparator - int tier = Array.BinarySearch(tierStartAddresses, 0, tierStartAddresses.Length, address, Comparer.Create((x, y) => y.CompareTo(x))); - // Binary search returns either the index or bitwise complement of the index of the first element smaller than start address. - // We want the first element with start address smaller than given address. - return tier >= 0 ? ++tier : ~tier; + // Can use binary search, but 1) it might not be faster than linear on a array assumed small, and 2) C# built in does not guarantee first element is returned on duplicates. + // Therefore we are sticking to the simpler approach at first. + for (int i = 0; i < devices.Count; i++) + { + if (tierStartAddresses[i] <= address) return i; + } + // TODO(Tianyu): This exception should never be triggered if we enforce that the last tier has unbounded storage. + throw new ArgumentException("No such address exists"); } private void UpdateLogHead(long writeHead) diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index c819ce083..b8b341c0d 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -95,5 +95,15 @@ public void PageBlobWriteRead() { TestDeviceWriteRead(Devices.CreateAzurePageBlobDevice("BasicDiskFASTERTests", deleteOnClose: false)); } + + [Test] + public void TieredWriteRead() + { + // TODO(Tianyu): Magic constant + IDevice localDevice = Devices.CreateLogDevice(TestContext.CurrentContext.TestDirectory + "\\BasicDiskFASTERTests.log", deleteOnClose: true, capacity : 1 << 30); + IDevice cloudDevice = Devices.CreateAzurePageBlobDevice("BasicDiskFASTERTests", deleteOnClose: false); + var device = new TieredStorageDevice(1, localDevice, cloudDevice); + TestDeviceWriteRead(device); + } } } From 17a577f3d085b86fb1a8e74c51a2be23a49e55bb Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 9 Jul 2019 11:39:30 -0700 Subject: [PATCH 24/56] Wait for all previous tiers to complete write before invoking callback in commit point for write request. --- cs/src/core/Device/AzurePageBlobDevice.cs | 2 +- cs/src/core/Device/Devices.cs | 6 +- cs/src/core/Device/LocalStorageDevice.cs | 2 +- .../core/Device/ManagedLocalStorageDevice.cs | 2 +- cs/src/core/Device/NullDevice.cs | 2 +- cs/src/core/Device/StorageDeviceBase.cs | 5 -- cs/src/core/Device/TieredStorageDevice.cs | 55 +++++++++++++++---- 7 files changed, 52 insertions(+), 22 deletions(-) diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 0832e5379..0a3b10330 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -37,7 +37,7 @@ public class AzurePageBlobDevice : StorageDeviceBase /// The container is not deleted even if it was created in this constructor /// /// The maximum number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit - public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED) + public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false, long capacity = Devices.CAPACITY_UNSPECIFIED) : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE, capacity) { CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index 90f964b8f..355df9d8c 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -13,8 +13,10 @@ namespace FASTER.core /// public static class Devices { - // TODO(Tianyu): Should I just move the constant from storage device base here? - public const long CAPACITY_UNSPECIFIED = StorageDeviceBase.CAPACITY_UNSPECIFIED; + /// + /// This value is supplied for capacity when the device does not have a specified limit. + /// + public const long CAPACITY_UNSPECIFIED = -1; private const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; private const string TEST_CONTAINER = "test"; diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index 664ac1524..36c0a140f 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -27,7 +27,7 @@ public class LocalStorageDevice : StorageDeviceBase /// /// /// The maximum number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit - public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED) + public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, long capacity = Devices.CAPACITY_UNSPECIFIED) : base(filename, GetSectorSize(filename), capacity) { Native32.EnableProcessPrivileges(); diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index bf0ffc667..9d778e20c 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -28,7 +28,7 @@ public class ManagedLocalStorageDevice : StorageDeviceBase /// /// /// The maximal number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit - public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED) + public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, long capacity = Devices.CAPACITY_UNSPECIFIED) : base(filename, GetSectorSize(filename), capacity) { pool = new SectorAlignedBufferPool(1, 1); diff --git a/cs/src/core/Device/NullDevice.cs b/cs/src/core/Device/NullDevice.cs index 1cab439af..f861af048 100644 --- a/cs/src/core/Device/NullDevice.cs +++ b/cs/src/core/Device/NullDevice.cs @@ -14,7 +14,7 @@ public class NullDevice : StorageDeviceBase /// /// /// - public NullDevice() : base("null", 512, CAPACITY_UNSPECIFIED) + public NullDevice() : base("null", 512, Devices.CAPACITY_UNSPECIFIED) { } diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 8c696a1b8..3e59ad1c5 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -19,11 +19,6 @@ namespace FASTER.core public abstract class StorageDeviceBase : IDevice { - /// - /// This value is supplied for capacity when the device does not have a specified limit. - /// - public const long CAPACITY_UNSPECIFIED = -1; - /// /// /// diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index dcb7d85b7..b51c299fb 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -3,6 +3,7 @@ using System.Text; using System.Diagnostics; using System.Threading; +using System.ComponentModel; namespace FASTER.core { @@ -18,16 +19,19 @@ class TieredStorageDevice : StorageDeviceBase // mark of the addresses seen in the WriteAsyncMethod. private long logHead; - // TODO(Tianyu): So far, I don't believe sector size is used anywhere in the code. Therefore I am not reasoning about what the - // sector size of a tiered storage should be when different tiers can have different sector sizes. + // TODO(Tianyu): Not reasoning about what the sector size of a tiered storage should be when different tiers can have different sector sizes. /// /// Constructs a new TieredStorageDevice composed of the given devices. /// + /// + /// The index of an IDevice in . When a write has been completed on the device, + /// the write is considered persistent. It is guaranteed that the callback in + /// will not be called until the write is completed on the commit point device. + /// /// /// List of devices to be used. The list should be given in order of hot to cold. Read is served from the /// device with smallest index in the list that has the requested data /// - /// // TODO(Tianyu): Recovering from a tiered device is potentially difficult, because we also need to recover their respective ranges. public TieredStorageDevice(int commitPoint, IList devices) : base(ComputeFileString(devices, commitPoint), 512, ComputeCapacity(devices)) { @@ -41,10 +45,24 @@ public TieredStorageDevice(int commitPoint, IList devices) : base(Compu logHead = 0; } + /// + /// Constructs a new TieredStorageDevice composed of the given devices. + /// + /// + /// The index of an IDevice in devices. When a write has been completed on the device, + /// the write is considered persistent. It is guaranteed that the callback in + /// will not be called until the write is completed on the commit point device. + /// + /// + /// List of devices to be used. The list should be given in order of hot to cold. Read is served from the + /// device with smallest index in the list that has the requested data + /// public TieredStorageDevice(int commitPoint, params IDevice[] devices) : this(commitPoint, (IList)devices) { } + + // TODO(Tianyu): Unclear whether this is the right design. Should we allow different tiers different segment sizes? public override void Initialize(long segmentSize) { foreach (IDevice devices in devices) @@ -106,17 +124,29 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, ulong alignedDestin int startTier = FindClosestDeviceContaining((long)alignedDestinationAddress); // TODO(Tianyu): Can you ever initiate a write that is after the commit point? Given FASTER's model of a read-only region, this will probably never happen. Debug.Assert(startTier <= commitPoint, "Write should not elide the commit point"); + + var countdown = new CountdownEvent(commitPoint + 1); // number of devices to wait on + // Issue writes to all tiers in parallel for (int i = startTier; i < devices.Count; i++) { - if (i == commitPoint) + if (i <= commitPoint) { - // Only if the write is complete on the commit point should we invoke the call back. - devices[i].WriteAsync(sourceAddress, alignedDestinationAddress, numBytesToWrite, callback, asyncResult); + // All tiers before the commit point (incluisive) need to be persistent before the callback is invoked. + devices[i].WriteAsync(sourceAddress, alignedDestinationAddress, numBytesToWrite, (e, n, o) => + { + // The last tier to finish invokes the callback + if (countdown.Signal()) + { + callback(e, n, o); + } + }, asyncResult); } else { - // Otherwise, simply issue the write without caring about callbacks // TODO(Tianyu): We may need some type of count down to verify that all writes are finished before closing a device. + // Some device may already provide said guarantee, however. + + // Otherwise, simply issue the write without caring about callbacks devices[i].WriteAsync(sourceAddress, alignedDestinationAddress, numBytesToWrite, (e, n, o) => { }, null); } } @@ -135,11 +165,11 @@ private static long ComputeCapacity(IList devices) foreach (IDevice device in devices) { // Unless the last tier device has unspecified storage capacity, in which case the tiered storage also has unspecified capacity - if (device.Capacity == CAPACITY_UNSPECIFIED) + if (device.Capacity == Devices.CAPACITY_UNSPECIFIED) { // TODO(Tianyu): Is this assumption too strong? Debug.Assert(device == devices[devices.Count - 1], "Only the last tier storage of a tiered storage device can have unspecified capacity"); - return CAPACITY_UNSPECIFIED; + return Devices.CAPACITY_UNSPECIFIED; } result += device.Capacity; } @@ -152,7 +182,9 @@ private static string ComputeFileString(IList devices, int commitPoint) StringBuilder result = new StringBuilder(); foreach (IDevice device in devices) { - result.AppendFormat("{0}, file name {1}, capacity {2} bytes;", device.GetType().Name, device.FileName, device.Capacity == CAPACITY_UNSPECIFIED ? "unspecified" : device.Capacity.ToString()); + string formatString = "{0}, file name {1}, capacity {2} bytes;"; + string capacity = device.Capacity == Devices.CAPACITY_UNSPECIFIED ? "unspecified" : device.Capacity.ToString(); + result.AppendFormat(formatString, device.GetType().Name, device.FileName, capacity); } result.AppendFormat("commit point: {0} at tier {1}", devices[commitPoint].GetType().Name, commitPoint); return result.ToString(); @@ -184,13 +216,14 @@ private void UpdateDeviceRange(int tier, long writeHead) { IDevice device = devices[tier]; // Never need to update range if storage is unbounded - if (device.Capacity == CAPACITY_UNSPECIFIED) return; + if (device.Capacity == Devices.CAPACITY_UNSPECIFIED) return; long oldLogTail = tierStartAddresses[tier]; if (writeHead - oldLogTail > device.Capacity) { long newLogTail = writeHead - oldLogTail - device.Capacity; tierStartAddresses[tier] = newLogTail; + // TODO(Tianyu): This should also be made async. // TODO(Tianyu): There will be a race here with readers. Epoch protection? device.DeleteAddressRange(oldLogTail, newLogTail); } From e4ca28446eeece774e0a8006432f659825d53d6f Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Wed, 10 Jul 2019 13:19:00 -0700 Subject: [PATCH 25/56] Integrate epoch protection, refactor for uniform segment sizes --- cs/src/core/Allocator/AllocatorBase.cs | 4 +- cs/src/core/Device/AzurePageBlobDevice.cs | 13 ++- cs/src/core/Device/IDevice.cs | 13 ++- cs/src/core/Device/LocalStorageDevice.cs | 4 + .../core/Device/ManagedLocalStorageDevice.cs | 5 + cs/src/core/Device/NullDevice.cs | 5 + cs/src/core/Device/StorageDeviceBase.cs | 38 +++++- cs/src/core/Device/TieredStorageDevice.cs | 108 ++++++++---------- 8 files changed, 118 insertions(+), 72 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 558ac6f52..48f53fb39 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -486,8 +486,8 @@ public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer else this.epoch = epoch; - settings.LogDevice.Initialize(1L << settings.SegmentSizeBits); - settings.ObjectLogDevice?.Initialize(1L << settings.SegmentSizeBits); + settings.LogDevice.Initialize(1L << settings.SegmentSizeBits, epoch); + settings.ObjectLogDevice?.Initialize(1L << settings.SegmentSizeBits, epoch); // Page size LogPageSizeBits = settings.PageSizeBits; diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 0a3b10330..005c98697 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -66,19 +66,22 @@ public override void Close() } } } - /// - /// Inherited - /// - public override void DeleteSegmentRange(int fromSegment, int toSegment) + + public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) { + CountdownEvent countdown = new CountdownEvent(toSegment - fromSegment); for (int i = fromSegment; i < toSegment; i++) { if (blobs.TryRemove(i, out CloudPageBlob blob)) { - blob.Delete(); + blob.BeginDelete(r => + { + if (countdown.Signal()) callback(asyncResult); + }, asyncResult); } } } + /// /// Inherited /// diff --git a/cs/src/core/Device/IDevice.cs b/cs/src/core/Device/IDevice.cs index 72edb58f8..5febb331d 100644 --- a/cs/src/core/Device/IDevice.cs +++ b/cs/src/core/Device/IDevice.cs @@ -28,10 +28,17 @@ public interface IDevice long Capacity { get; } /// - /// Initialize device + /// Initialize device. This function is used to pass optional information that may only be known after + /// FASTER initialization (whose constructor takes in IDevice upfront). Implementation are free to ignore + /// information if it does not need the supplied information. + /// + /// This is a bit of a hack. /// /// - void Initialize(long segmentSize); + /// + /// The instance of the epoch protection framework to use, if needed + /// + void Initialize(long segmentSize, LightEpoch epoch = null); /* Segmented addressing API */ @@ -57,6 +64,8 @@ public interface IDevice /// void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult); + void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult); + /// /// Delete segment range /// diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index 36c0a140f..cf0f48d0b 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -127,6 +127,10 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, } } + public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) + { + UseSynchronousDeleteSegmentRangeForAsync(fromSegment, toSegment, callback, asyncResult); + } /// /// diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index 9d778e20c..3b46c36f2 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -156,6 +156,11 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) } } + public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) + { + UseSynchronousDeleteSegmentRangeForAsync(fromSegment, toSegment, callback, asyncResult); + } + /// /// /// diff --git a/cs/src/core/Device/NullDevice.cs b/cs/src/core/Device/NullDevice.cs index f861af048..f61273360 100644 --- a/cs/src/core/Device/NullDevice.cs +++ b/cs/src/core/Device/NullDevice.cs @@ -70,6 +70,11 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) { } + public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) + { + UseSynchronousDeleteSegmentRangeForAsync(fromSegment, toSegment, callback, asyncResult); + } + /// /// /// diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 3e59ad1c5..080afa677 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -43,6 +43,9 @@ public abstract class StorageDeviceBase : IDevice private int segmentSizeBits; private ulong segmentSizeMask; + // A device may have internal in-memory data structure that requires epoch protection under concurrent access. + protected LightEpoch epoch; + /// /// Initializes a new StorageDeviceBase /// @@ -65,12 +68,13 @@ public StorageDeviceBase(string filename, uint sectorSize, long capacity) /// Initialize device /// /// - public virtual void Initialize(long segmentSize) + public virtual void Initialize(long segmentSize, LightEpoch epoch = null) { // TODO(Tianyu): Alternatively, we can adjust capacity based on the segment size: given a phsyical upper limit of capacity, // we only make use of (Capacity / segmentSize * segmentSize) many bytes. Debug.Assert(Capacity == -1 || Capacity % segmentSize == 0, "capacity must be a multiple of segment sizes"); this.segmentSize = segmentSize; + this.epoch = epoch; if (!Utility.IsPowerOfTwo(segmentSize)) { if (segmentSize != -1) @@ -93,7 +97,7 @@ public virtual void Initialize(long segmentSize) /// /// /// - public virtual void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + public void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { var segment = segmentSizeBits < 64 ? alignedDestinationAddress >> segmentSizeBits : 0; WriteAsync( @@ -111,7 +115,7 @@ public virtual void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestina /// /// /// - public virtual void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult) + public void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult) { var segment = segmentSizeBits < 64 ? alignedSourceAddress >> segmentSizeBits : 0; @@ -127,13 +131,18 @@ public virtual void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinat /// /// /// - public virtual void DeleteAddressRange(long fromAddress, long toAddress) + public void DeleteAddressRange(long fromAddress, long toAddress) { var fromSegment = segmentSizeBits < 64 ? fromAddress >> segmentSizeBits : 0; var toSegment = segmentSizeBits < 64 ? toAddress >> segmentSizeBits : 0; DeleteSegmentRange((int)fromSegment, (int)toSegment); } + private bool AlignedAtSegmentBoundary(long address) + { + return ((long)segmentSizeMask & address) == 0; + } + /// /// /// @@ -161,7 +170,26 @@ public virtual void DeleteAddressRange(long fromAddress, long toAddress) /// /// /// - public abstract void DeleteSegmentRange(int fromSegment, int toSegment); + public virtual unsafe void DeleteSegmentRange(int fromSegment, int toSegment) + { + ManualResetEventSlim completionEvent = new ManualResetEventSlim(false); + DeleteSegmentRangeAsync(fromSegment, toSegment, r => + { + completionEvent.Set(); + }, null); + completionEvent.Wait(); + } + + public abstract void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult); + + + protected void UseSynchronousDeleteSegmentRangeForAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) + { + DeleteSegmentRange(fromSegment, toSegment); + // TODO(Tianyu): There is apparently no setters on IAsyncResult. Should I just pass this or do I need to set some states? + // e.g. set CompletedSynchronously to true + callback(asyncResult); + } /// /// diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index b51c299fb..44a838043 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -14,10 +14,10 @@ class TieredStorageDevice : StorageDeviceBase // TODO(Tianyu): For some retarded reason Interlocked provides no CompareExchange for unsigned primitives. // Because it is assumed that tiers are inclusive with one another, we only need to store the starting address of the log portion avialable on each tier. // That implies this list is sorted in descending order with the last tier being 0 always. - private readonly long[] tierStartAddresses; + private readonly int[] tierStartSegment; // Because the device has no access to in-memory log tail information, we need to keep track of that ourselves. Currently this is done by keeping a high-water - // mark of the addresses seen in the WriteAsyncMethod. - private long logHead; + // mark of the segment id seen in the WriteAsyncMethod. + private int logTail; // TODO(Tianyu): Not reasoning about what the sector size of a tiered storage should be when different tiers can have different sector sizes. /// @@ -39,10 +39,10 @@ public TieredStorageDevice(int commitPoint, IList devices) : base(Compu // TODO(Tianyu): Should assert that passed in devices are not yet initialized. This is more challenging for recovering. this.devices = devices; this.commitPoint = commitPoint; - tierStartAddresses = (long[])Array.CreateInstance(typeof(long), devices.Count); - tierStartAddresses.Initialize(); + tierStartSegment = (int[])Array.CreateInstance(typeof(int), devices.Count); + tierStartSegment.Initialize(); // TODO(Tianyu): Change after figuring out how to deal with recovery. - logHead = 0; + logTail = 0; } /// @@ -50,8 +50,8 @@ public TieredStorageDevice(int commitPoint, IList devices) : base(Compu /// /// /// The index of an IDevice in devices. When a write has been completed on the device, - /// the write is considered persistent. It is guaranteed that the callback in - /// will not be called until the write is completed on the commit point device. + /// the write is considered persistent. It is guaranteed that the callback in + /// will not be called until the write is completed on commit point device and all previous tiers. /// /// /// List of devices to be used. The list should be given in order of hot to cold. Read is served from the @@ -63,11 +63,14 @@ public TieredStorageDevice(int commitPoint, params IDevice[] devices) : this(com // TODO(Tianyu): Unclear whether this is the right design. Should we allow different tiers different segment sizes? - public override void Initialize(long segmentSize) + public override void Initialize(long segmentSize, LightEpoch epoch) { + Debug.Assert(epoch != null, "TieredStorage requires epoch protection to work correctly"); + base.Initialize(segmentSize, epoch); + foreach (IDevice devices in devices) { - devices.Initialize(segmentSize); + devices.Initialize(segmentSize, epoch); } } @@ -80,48 +83,44 @@ public override void Close() } } - public override void DeleteAddressRange(long fromAddress, long toAddress) + public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) { // TODO(Tianyu): concurrency - int fromStartTier = FindClosestDeviceContaining(fromAddress); - int toStartTier = FindClosestDeviceContaining(toAddress); + // TODO(Tianyu): It is probably fine to simply forward the call given how this API is being used. There is plenty of room + // for erroneous inputs here though. + int fromStartTier = FindClosestDeviceContaining(fromSegment); + int toStartTier = FindClosestDeviceContaining(toSegment); + var countdown = new CountdownEvent(toStartTier - fromStartTier); // number of devices to wait on for (int i = fromStartTier; i < toStartTier; i++) { // Because our tiered storage is inclusive, - devices[i].DeleteAddressRange((long)Math.Max(fromAddress, tierStartAddresses[i]), toAddress); + devices[i].DeleteSegmentRangeAsync(Math.Max(fromSegment, tierStartSegment[i]), toSegment, r => + { + if (countdown.Signal()) callback(asyncResult); + }, null); } } - public override void DeleteSegmentRange(int fromSegment, int toSegment) - { - throw new NotSupportedException(); - } - public override void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint alignedReadLength, IOCompletionCallback callback, IAsyncResult asyncResulte) + public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) { // TODO(Tianyu): This whole operation needs to be thread-safe with concurrent calls to writes, which may trigger a change in start address. - IDevice closestDevice = devices[FindClosestDeviceContaining((long)alignedSourceAddress)]; + IDevice closestDevice = devices[FindClosestDeviceContaining(segmentId)]; + // TODO(Tianyu): I don't think there is a "grab-lock" step for the epoch protection framework here? // We can directly forward the address, because assuming an inclusive policy, all devices agree on the same address space. The only difference is that some segments may not // be present for certain devices. - closestDevice.ReadAsync(alignedSourceAddress, alignedDestinationAddress, alignedReadLength, callback, asyncResulte); + closestDevice.ReadAsync(segmentId, sourceAddress, destinationAddress, readLength, callback, asyncResult); } - public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) - { - // If it is not guaranteed that all underlying tiers agree on a segment size, this API cannot have a meaningful implementation - throw new NotSupportedException(); - } - - public override unsafe void WriteAsync(IntPtr sourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - long writeHead = (long)alignedDestinationAddress + numBytesToWrite; // TODO(Tianyu): Think more carefully about how this can interleave. - UpdateLogHead(writeHead); + UpdateLogTail(segmentId); for (int i = 0; i < devices.Count; i++) { - UpdateDeviceRange(i, writeHead); + UpdateDeviceRange(i, segmentId); } - int startTier = FindClosestDeviceContaining((long)alignedDestinationAddress); + int startTier = FindClosestDeviceContaining(segmentId); // TODO(Tianyu): Can you ever initiate a write that is after the commit point? Given FASTER's model of a read-only region, this will probably never happen. Debug.Assert(startTier <= commitPoint, "Write should not elide the commit point"); @@ -131,14 +130,12 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, ulong alignedDestin { if (i <= commitPoint) { + // All tiers before the commit point (incluisive) need to be persistent before the callback is invoked. - devices[i].WriteAsync(sourceAddress, alignedDestinationAddress, numBytesToWrite, (e, n, o) => + devices[i].WriteAsync(sourceAddress, segmentId, destinationAddress, numBytesToWrite, (e, n, o) => { // The last tier to finish invokes the callback - if (countdown.Signal()) - { - callback(e, n, o); - } + if (countdown.Signal()) callback(e, n, o); }, asyncResult); } else @@ -147,17 +144,11 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, ulong alignedDestin // Some device may already provide said guarantee, however. // Otherwise, simply issue the write without caring about callbacks - devices[i].WriteAsync(sourceAddress, alignedDestinationAddress, numBytesToWrite, (e, n, o) => { }, null); + devices[i].WriteAsync(sourceAddress, segmentId, destinationAddress, numBytesToWrite, (e, n, o) => { }, null); } } } - public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) - { - // If it is not guaranteed that all underlying tiers agree on a segment size, this API cannot have a meaningful implementation - throw new NotSupportedException(); - } - private static long ComputeCapacity(IList devices) { long result = 0; @@ -196,36 +187,37 @@ private int FindClosestDeviceContaining(long address) // Therefore we are sticking to the simpler approach at first. for (int i = 0; i < devices.Count; i++) { - if (tierStartAddresses[i] <= address) return i; + if (tierStartSegment[i] <= address) return i; } // TODO(Tianyu): This exception should never be triggered if we enforce that the last tier has unbounded storage. throw new ArgumentException("No such address exists"); } - private void UpdateLogHead(long writeHead) + private void UpdateLogTail(int writeTail) { - long logHeadLocal; + int logTailLocal; do { - logHeadLocal = logHead; - if (logHeadLocal >= writeHead) return; - } while (logHeadLocal != Interlocked.CompareExchange(ref logHead, writeHead, logHeadLocal)); + logTailLocal = logTail; + if (logTailLocal >= writeTail) return; + } while (logTailLocal != Interlocked.CompareExchange(ref logTail, writeTail, logTailLocal)); } - private void UpdateDeviceRange(int tier, long writeHead) + private void UpdateDeviceRange(int tier, int writeTail) { IDevice device = devices[tier]; // Never need to update range if storage is unbounded if (device.Capacity == Devices.CAPACITY_UNSPECIFIED) return; - long oldLogTail = tierStartAddresses[tier]; - if (writeHead - oldLogTail > device.Capacity) + int oldStartSegment = tierStartSegment[tier]; + if ((writeTail - oldStartSegment) * segmentSize > device.Capacity) { - long newLogTail = writeHead - oldLogTail - device.Capacity; - tierStartAddresses[tier] = newLogTail; - // TODO(Tianyu): This should also be made async. - // TODO(Tianyu): There will be a race here with readers. Epoch protection? - device.DeleteAddressRange(oldLogTail, newLogTail); + int newStartSegment = writeTail - oldStartSegment - (int)(device.Capacity / segmentSize); + tierStartSegment[tier] = newStartSegment; + // We are assuming that the capacity given to a storage tier is not the physical capacity of the underlying device --- there will always be enough space to + // write extra segments while deletes are underway. If this assumption is not true, we will need to perform any writes in the callback of the delete. + // This action needs to be epoch-protected because readers may be issuing reads to the deleted segment, unaware of the delete. + epoch.BumpCurrentEpoch(() => device.DeleteSegmentRangeAsync(oldStartSegment, newStartSegment, r => { }, null)); } } } From 0a8742202e8aac7e4aa6541c78be502c02f4ae54 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Wed, 10 Jul 2019 14:23:08 -0700 Subject: [PATCH 26/56] Pick out the MonotonicUpdate function into utility --- cs/src/core/Allocator/AllocatorBase.cs | 51 +++++++------------------- cs/src/core/Utilities/Utility.cs | 34 +++++++++++++++++ 2 files changed, 47 insertions(+), 38 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 48f53fb39..557cddea3 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -870,7 +870,7 @@ public void ShiftReadOnlyToTail(out long tailAddress) tailAddress = GetTailAddress(); long localTailAddress = tailAddress; long currentReadOnlyOffset = ReadOnlyAddress; - if (MonotonicUpdate(ref ReadOnlyAddress, tailAddress, out long oldReadOnlyOffset)) + if (Utility.MonotonicUpdate(ref ReadOnlyAddress, tailAddress, out long oldReadOnlyOffset)) { epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(localTailAddress, false)); } @@ -882,7 +882,7 @@ public void ShiftReadOnlyToTail(out long tailAddress) /// public bool ShiftReadOnlyAddress(long newReadOnlyAddress) { - if (MonotonicUpdate(ref ReadOnlyAddress, newReadOnlyAddress, out long oldReadOnlyOffset)) + if (Utility.MonotonicUpdate(ref ReadOnlyAddress, newReadOnlyAddress, out long oldReadOnlyOffset)) { epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(newReadOnlyAddress, false)); return true; @@ -897,19 +897,19 @@ public bool ShiftReadOnlyAddress(long newReadOnlyAddress) public void ShiftBeginAddress(long newBeginAddress) { // First update the begin address - MonotonicUpdate(ref BeginAddress, newBeginAddress, out long oldBeginAddress); + Utility.MonotonicUpdate(ref BeginAddress, newBeginAddress, out long oldBeginAddress); // Then the head address - var h = MonotonicUpdate(ref HeadAddress, newBeginAddress, out long old); + var h = Utility.MonotonicUpdate(ref HeadAddress, newBeginAddress, out long old); // Finally the read-only address - var r = MonotonicUpdate(ref ReadOnlyAddress, newBeginAddress, out old); + var r = Utility.MonotonicUpdate(ref ReadOnlyAddress, newBeginAddress, out old); // Clean up until begin address epoch.BumpCurrentEpoch(() => { if (r) { - MonotonicUpdate(ref SafeReadOnlyAddress, newBeginAddress, out long _old); - MonotonicUpdate(ref FlushedUntilAddress, newBeginAddress, out _old); + Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newBeginAddress, out long _old); + Utility.MonotonicUpdate(ref FlushedUntilAddress, newBeginAddress, out _old); } if (h) OnPagesClosed(newBeginAddress); @@ -935,7 +935,7 @@ protected virtual void DeleteAddressRange(long fromAddress, long toAddress) /// public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendingFlushComplete = false) { - if (MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress)) + if (Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress)) { Debug.WriteLine("SafeReadOnly shifted from {0:X} to {1:X}", oldSafeReadOnlyAddress, newSafeReadOnlyAddress); long startPage = oldSafeReadOnlyAddress >> LogPageSizeBits; @@ -964,7 +964,7 @@ public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendi /// public void OnPagesClosed(long newSafeHeadAddress) { - if (MonotonicUpdate(ref SafeHeadAddress, newSafeHeadAddress, out long oldSafeHeadAddress)) + if (Utility.MonotonicUpdate(ref SafeHeadAddress, newSafeHeadAddress, out long oldSafeHeadAddress)) { Debug.WriteLine("SafeHeadOffset shifted from {0:X} to {1:X}", oldSafeHeadAddress, newSafeHeadAddress); @@ -1020,7 +1020,7 @@ private void PageAlignedShiftReadOnlyAddress(long currentTailAddress) long currentReadOnlyAddress = ReadOnlyAddress; long pageAlignedTailAddress = currentTailAddress & ~PageSizeMask; long desiredReadOnlyAddress = (pageAlignedTailAddress - ReadOnlyLagAddress); - if (MonotonicUpdate(ref ReadOnlyAddress, desiredReadOnlyAddress, out long oldReadOnlyAddress)) + if (Utility.MonotonicUpdate(ref ReadOnlyAddress, desiredReadOnlyAddress, out long oldReadOnlyAddress)) { Debug.WriteLine("Allocate: Moving read-only offset from {0:X} to {1:X}", oldReadOnlyAddress, desiredReadOnlyAddress); epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(desiredReadOnlyAddress)); @@ -1050,7 +1050,7 @@ private void PageAlignedShiftHeadAddress(long currentTailAddress) if (ReadCache && (newHeadAddress > HeadAddress)) EvictCallback(HeadAddress, newHeadAddress); - if (MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) + if (Utility.MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) { Debug.WriteLine("Allocate: Moving head offset from {0:X} to {1:X}", oldHeadAddress, newHeadAddress); epoch.BumpCurrentEpoch(() => OnPagesClosed(newHeadAddress)); @@ -1075,7 +1075,7 @@ public long ShiftHeadAddress(long desiredHeadAddress) if (ReadCache && (newHeadAddress > HeadAddress)) EvictCallback(HeadAddress, newHeadAddress); - if (MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) + if (Utility.MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) { Debug.WriteLine("Allocate: Moving head offset from {0:X} to {1:X}", oldHeadAddress, newHeadAddress); epoch.BumpCurrentEpoch(() => OnPagesClosed(newHeadAddress)); @@ -1104,35 +1104,10 @@ protected void ShiftFlushedUntilAddress() if (update) { - MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress); + Utility.MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress); } } - - - /// - /// Used by several functions to update the variable to newValue. Ignores if newValue is smaller or - /// than the current value. - /// - /// - /// - /// - /// - private bool MonotonicUpdate(ref long variable, long newValue, out long oldValue) - { - oldValue = variable; - while (oldValue < newValue) - { - var foundValue = Interlocked.CompareExchange(ref variable, newValue, oldValue); - if (foundValue == oldValue) - { - return true; - } - oldValue = foundValue; - } - return false; - } - /// /// Reset for recovery /// diff --git a/cs/src/core/Utilities/Utility.cs b/cs/src/core/Utilities/Utility.cs index 57ab7168f..375b20cfc 100644 --- a/cs/src/core/Utilities/Utility.cs +++ b/cs/src/core/Utilities/Utility.cs @@ -223,5 +223,39 @@ internal static int Murmur3(int h) a ^= a >> 16; return (int)a; } + + /// + /// Updates the variable to newValue only if the current value is smaller than the new value. + /// + /// The variable to possibly replace + /// The value that replaces the variable if successful + /// The orignal value in the variable + /// if oldValue less than newValue + public static bool MonotonicUpdate(ref long variable, long newValue, out long oldValue) + { + do + { + oldValue = variable; + if (oldValue > newValue) return false; + } while (Interlocked.CompareExchange(ref variable, newValue, oldValue) != oldValue); + return true; + } + /// + /// Updates the variable to newValue only if the current value is smaller than the new value. + /// + /// The variable to possibly replace + /// The value that replaces the variable if successful + /// The orignal value in the variable + /// if oldValue less than newValue + public static bool MonotonicUpdate(ref int variable, int newValue, out int oldValue) + { + do + { + oldValue = variable; + if (oldValue > newValue) return false; + } while (Interlocked.CompareExchange(ref variable, newValue, oldValue) != oldValue); + return true; + } + } } From 48b818272f8fb030a024f4291814cdb4a2876e58 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Thu, 11 Jul 2019 10:23:07 -0700 Subject: [PATCH 27/56] Handle contention on tier start addresses and read-delete --- cs/src/core/Device/TieredStorageDevice.cs | 93 ++++++++++++++--------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index 44a838043..789b1ffa4 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -13,7 +13,7 @@ class TieredStorageDevice : StorageDeviceBase private readonly int commitPoint; // TODO(Tianyu): For some retarded reason Interlocked provides no CompareExchange for unsigned primitives. // Because it is assumed that tiers are inclusive with one another, we only need to store the starting address of the log portion avialable on each tier. - // That implies this list is sorted in descending order with the last tier being 0 always. + // That implies this list is sorted in descending order with the last tier being the head of the log always. private readonly int[] tierStartSegment; // Because the device has no access to in-memory log tail information, we need to keep track of that ourselves. Currently this is done by keeping a high-water // mark of the segment id seen in the WriteAsyncMethod. @@ -78,35 +78,47 @@ public override void Close() { foreach (IDevice device in devices) { - // TODO(Tianyu): All writes need to have succeeded when we call this. device.Close(); } } public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) { - // TODO(Tianyu): concurrency - // TODO(Tianyu): It is probably fine to simply forward the call given how this API is being used. There is plenty of room - // for erroneous inputs here though. - int fromStartTier = FindClosestDeviceContaining(fromSegment); + // Compute the tiers that we need to call delete on. This value may be stale due to concurrent calls to WriteAsync, which may + // evict segments from a device. This is only used as a starting point for the delete. int toStartTier = FindClosestDeviceContaining(toSegment); - var countdown = new CountdownEvent(toStartTier - fromStartTier); // number of devices to wait on - for (int i = fromStartTier; i < toStartTier; i++) + + + // Delete callback should not be invoked until all deletes are completed + var countdown = new CountdownEvent(devices.Count - toStartTier); + + // This is assuming that there are enough physical space left on the device to accomodate more data than the specified capacity --- + // concurrent writes may happen before deletes are completed + for (int i = toStartTier; i < devices.Count; i++) { - // Because our tiered storage is inclusive, - devices[i].DeleteSegmentRangeAsync(Math.Max(fromSegment, tierStartSegment[i]), toSegment, r => + // Attempt to monotonically update the range stored by the tier before calling delete. If monotonic update fails, + // all deletes that needed to be invoked are already called by other threads, so skip this tier. + if (!Utility.MonotonicUpdate(ref tierStartSegment[i], toSegment, out int oldValue)) continue; + + // Otherwise, this function has atomically removed range [oldValue, toSegment). The segments in range [fromSegment, oldValue) are + // deleted by other concurrent threads, so we should not invoke delete on those. When calling delete, we use epoch protection to make + // sure no active readers are accessing the deleted segments before invoking delete. + // TODO(Tianyu): Is this too wasteful in terms of checking out epochs? + epoch.BumpCurrentEpoch(() => { - if (countdown.Signal()) callback(asyncResult); - }, null); + devices[i].DeleteSegmentRangeAsync(Math.Max(fromSegment, oldValue), toSegment, r => + { + if (countdown.Signal()) callback(asyncResult); + }, null); + }); } } public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) { - // TODO(Tianyu): This whole operation needs to be thread-safe with concurrent calls to writes, which may trigger a change in start address. + // This device is epoch-protected and cannot be stale while the operation is in flight IDevice closestDevice = devices[FindClosestDeviceContaining(segmentId)]; - // TODO(Tianyu): I don't think there is a "grab-lock" step for the epoch protection framework here? // We can directly forward the address, because assuming an inclusive policy, all devices agree on the same address space. The only difference is that some segments may not // be present for certain devices. closestDevice.ReadAsync(segmentId, sourceAddress, destinationAddress, readLength, callback, asyncResult); @@ -114,12 +126,17 @@ public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destin public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - // TODO(Tianyu): Think more carefully about how this can interleave. - UpdateLogTail(segmentId); - for (int i = 0; i < devices.Count; i++) + // Update the tail if the segment we are writing to is larger than the previous maximum + if (Utility.MonotonicUpdate(ref logTail, segmentId, out int originalTail)) { - UpdateDeviceRange(i, segmentId); + // If indeed we are writing a new segment, some devices may be out of space and require eviction. + for (int i = 0; i < devices.Count; i++) + { + // Instead of updating range using segmentId + UpdateDeviceRangeOnNewSegment(i); + } } + int startTier = FindClosestDeviceContaining(segmentId); // TODO(Tianyu): Can you ever initiate a write that is after the commit point? Given FASTER's model of a read-only region, this will probably never happen. Debug.Assert(startTier <= commitPoint, "Write should not elide the commit point"); @@ -193,32 +210,32 @@ private int FindClosestDeviceContaining(long address) throw new ArgumentException("No such address exists"); } - private void UpdateLogTail(int writeTail) - { - int logTailLocal; - do - { - logTailLocal = logTail; - if (logTailLocal >= writeTail) return; - } while (logTailLocal != Interlocked.CompareExchange(ref logTail, writeTail, logTailLocal)); - } - - private void UpdateDeviceRange(int tier, int writeTail) + private void UpdateDeviceRangeOnNewSegment(int tier) { IDevice device = devices[tier]; // Never need to update range if storage is unbounded if (device.Capacity == Devices.CAPACITY_UNSPECIFIED) return; - int oldStartSegment = tierStartSegment[tier]; - if ((writeTail - oldStartSegment) * segmentSize > device.Capacity) + // Attempt to update the stored range until there are enough space on the tier to accomodate the current logTail + int oldStartSegment, currentTail, newStartSegment; + do { - int newStartSegment = writeTail - oldStartSegment - (int)(device.Capacity / segmentSize); - tierStartSegment[tier] = newStartSegment; - // We are assuming that the capacity given to a storage tier is not the physical capacity of the underlying device --- there will always be enough space to - // write extra segments while deletes are underway. If this assumption is not true, we will need to perform any writes in the callback of the delete. - // This action needs to be epoch-protected because readers may be issuing reads to the deleted segment, unaware of the delete. - epoch.BumpCurrentEpoch(() => device.DeleteSegmentRangeAsync(oldStartSegment, newStartSegment, r => { }, null)); - } + oldStartSegment = tierStartSegment[tier]; + currentTail = logTail; + // No need to update if still within capacity; + if ((currentTail - oldStartSegment) * segmentSize <= device.Capacity) return; + // TODO(Tianyu): Can probably use a bit shift instead, but that is private on the base + newStartSegment = currentTail - (int)(device.Capacity / segmentSize); + + } while (Interlocked.CompareExchange(ref tierStartSegment[tier], newStartSegment, oldStartSegment) != oldStartSegment); + + // This action needs to be epoch-protected because readers may be issuing reads to the deleted segment, unaware of the delete. + // Because of earlier compare-and-swap, the caller has exclusive access to the range [oldStartSegment, newStartSegment), and there will + // be no double deletes. + epoch.BumpCurrentEpoch(() => device.DeleteSegmentRangeAsync(oldStartSegment, newStartSegment, r => { }, null)); + // We are assuming that the capacity given to a storage tier is not the physical capacity of the underlying device --- there will always be enough space to + // write extra segments while deletes are underway. If this assumption is not true, we will need to perform any writes in the callback of the delete. } + } } From 25f3e7a3ea5b9f3dce24f821bd0a786987f00284 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Fri, 12 Jul 2019 10:41:07 -0700 Subject: [PATCH 28/56] Refactor IDevice to have a truncate interface instead of deletes. Move logic to track segment ranges from TieredStorageDevice down to StorageDeviceBase --- cs/src/core/Allocator/AllocatorBase.cs | 11 +- cs/src/core/Allocator/BlittableAllocator.cs | 5 - cs/src/core/Allocator/GenericAllocator.cs | 6 +- .../Allocator/VarLenBlittableAllocator.cs | 5 - cs/src/core/Device/AzurePageBlobDevice.cs | 14 +-- cs/src/core/Device/IDevice.cs | 32 +++--- cs/src/core/Device/LocalStorageDevice.cs | 29 +++-- .../core/Device/ManagedLocalStorageDevice.cs | 21 ++-- cs/src/core/Device/NullDevice.cs | 13 +-- cs/src/core/Device/StorageDeviceBase.cs | 105 +++++++++++------- cs/src/core/Device/TieredStorageDevice.cs | 104 +++-------------- cs/src/core/Utilities/Utility.cs | 4 +- 12 files changed, 133 insertions(+), 216 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 557cddea3..1907614e7 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -913,18 +913,13 @@ public void ShiftBeginAddress(long newBeginAddress) } if (h) OnPagesClosed(newBeginAddress); - DeleteAddressRange(oldBeginAddress, newBeginAddress); + TruncateUntilAddress(newBeginAddress); }); } - /// - /// Delete address range - /// - /// - /// - protected virtual void DeleteAddressRange(long fromAddress, long toAddress) + protected virtual void TruncateUntilAddress(long toAddress) { - device.DeleteAddressRange(fromAddress, toAddress); + device.TruncateUntilAddress(toAddress); } /// diff --git a/cs/src/core/Allocator/BlittableAllocator.cs b/cs/src/core/Allocator/BlittableAllocator.cs index 85736465b..8f119a38e 100644 --- a/cs/src/core/Allocator/BlittableAllocator.cs +++ b/cs/src/core/Allocator/BlittableAllocator.cs @@ -153,11 +153,6 @@ protected override bool IsAllocated(int pageIndex) return values[pageIndex] != null; } - protected override void DeleteAddressRange(long fromAddress, long toAddress) - { - base.DeleteAddressRange(fromAddress, toAddress); - } - protected override void WriteAsync(long flushPage, IOCompletionCallback callback, PageAsyncFlushResult asyncResult) { WriteAsync((IntPtr)pointers[flushPage % BufferSize], diff --git a/cs/src/core/Allocator/GenericAllocator.cs b/cs/src/core/Allocator/GenericAllocator.cs index 3e8ac26c0..d3970a981 100644 --- a/cs/src/core/Allocator/GenericAllocator.cs +++ b/cs/src/core/Allocator/GenericAllocator.cs @@ -226,10 +226,10 @@ protected override bool IsAllocated(int pageIndex) return values[pageIndex] != null; } - protected override void DeleteAddressRange(long fromAddress, long toAddress) + protected override void TruncateUntilAddress(long toAddress) { - base.DeleteAddressRange(fromAddress, toAddress); - objectLogDevice.DeleteSegmentRange((int)(fromAddress >> LogSegmentSizeBits), (int)(toAddress >> LogSegmentSizeBits)); + base.TruncateUntilAddress(toAddress); + objectLogDevice.TruncateUntilAddress(toAddress); } protected override void WriteAsync(long flushPage, IOCompletionCallback callback, PageAsyncFlushResult asyncResult) diff --git a/cs/src/core/Allocator/VarLenBlittableAllocator.cs b/cs/src/core/Allocator/VarLenBlittableAllocator.cs index 353409c27..42ed20e03 100644 --- a/cs/src/core/Allocator/VarLenBlittableAllocator.cs +++ b/cs/src/core/Allocator/VarLenBlittableAllocator.cs @@ -225,11 +225,6 @@ protected override bool IsAllocated(int pageIndex) return values[pageIndex] != null; } - protected override void DeleteAddressRange(long fromAddress, long toAddress) - { - base.DeleteAddressRange(fromAddress, toAddress); - } - protected override void WriteAsync(long flushPage, IOCompletionCallback callback, PageAsyncFlushResult asyncResult) { WriteAsync((IntPtr)pointers[flushPage % BufferSize], diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs index 005c98697..86984b3ef 100644 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ b/cs/src/core/Device/AzurePageBlobDevice.cs @@ -66,19 +66,11 @@ public override void Close() } } } - - public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) + public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result) { - CountdownEvent countdown = new CountdownEvent(toSegment - fromSegment); - for (int i = fromSegment; i < toSegment; i++) + if (blobs.TryRemove(segment, out CloudPageBlob blob)) { - if (blobs.TryRemove(i, out CloudPageBlob blob)) - { - blob.BeginDelete(r => - { - if (countdown.Signal()) callback(asyncResult); - }, asyncResult); - } + blob.BeginDelete(callback, result); } } diff --git a/cs/src/core/Device/IDevice.cs b/cs/src/core/Device/IDevice.cs index 5febb331d..5ea7c816e 100644 --- a/cs/src/core/Device/IDevice.cs +++ b/cs/src/core/Device/IDevice.cs @@ -27,6 +27,12 @@ public interface IDevice /// long Capacity { get; } + long SegmentSize { get; } + + int StartSegment { get; } + + int EndSegment { get; } + /// /// Initialize device. This function is used to pass optional information that may only be known after /// FASTER initialization (whose constructor takes in IDevice upfront). Implementation are free to ignore @@ -64,15 +70,6 @@ public interface IDevice /// void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult); - void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult); - - /// - /// Delete segment range - /// - /// - /// - void DeleteSegmentRange(int fromSegment, int toSegment); - /* Direct addressing API */ /// @@ -95,12 +92,17 @@ public interface IDevice /// void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult); - /// - /// Delete address range - /// - /// - /// - void DeleteAddressRange(long fromAddress, long toAddress); + void TruncateUntilAddressAsync(long toAddress, AsyncCallback callback, IAsyncResult result); + + void TruncateUntilAddress(long toAddress); + + void TruncateUntilSegmentAsync(int toSegment, AsyncCallback callback, IAsyncResult result); + + void TruncateUntilSegment(int toSegment); + + void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result); + + void RemoveSegment(int segment); /* Close */ diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index ac7781aea..afd34a0db 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -117,28 +117,25 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, } } - public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) + public override void RemoveSegment(int segment) { - UseSynchronousDeleteSegmentRangeForAsync(fromSegment, toSegment, callback, asyncResult); + if (logHandles.TryRemove(segment, out SafeFileHandle logHandle)) + { + logHandle.Dispose(); + Native32.DeleteFileW(GetSegmentName(segment)); + } } - /// - /// - /// - /// - /// - public override void DeleteSegmentRange(int fromSegment, int toSegment) + public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result) { - for (int i=fromSegment; i /// /// diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index 3b46c36f2..2a2564fd4 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -139,26 +139,19 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, new WriteCallbackWrapper(callback, asyncResult, memory).Callback, null); } - /// - /// - /// - /// - /// - public override void DeleteSegmentRange(int fromSegment, int toSegment) + public override void RemoveSegment(int segment) { - for (int i=fromSegment; i diff --git a/cs/src/core/Device/NullDevice.cs b/cs/src/core/Device/NullDevice.cs index f61273360..30412d5ec 100644 --- a/cs/src/core/Device/NullDevice.cs +++ b/cs/src/core/Device/NullDevice.cs @@ -61,19 +61,12 @@ public override unsafe void WriteAsync(IntPtr alignedSourceAddress, int segmentI callback(0, numBytesToWrite, ov_native); } - /// - /// - /// - /// - /// - public override void DeleteSegmentRange(int fromSegment, int toSegment) + public override void RemoveSegment(int segment) { + // No-op } - public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) - { - UseSynchronousDeleteSegmentRangeForAsync(fromSegment, toSegment, callback, asyncResult); - } + public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result) => callback(result); /// /// diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 080afa677..bc5b8484e 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -35,6 +35,12 @@ public abstract class StorageDeviceBase : IDevice /// public long Capacity { get; } + public int StartSegment { get { return startSegment; } } + + public int EndSegment { get { return endSegment; } } + + public long SegmentSize { get { return segmentSize; } } + /// /// Segment size /// @@ -46,6 +52,8 @@ public abstract class StorageDeviceBase : IDevice // A device may have internal in-memory data structure that requires epoch protection under concurrent access. protected LightEpoch epoch; + private int startSegment, endSegment; + /// /// Initializes a new StorageDeviceBase /// @@ -99,10 +107,19 @@ public virtual void Initialize(long segmentSize, LightEpoch epoch = null) /// public void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - var segment = segmentSizeBits < 64 ? alignedDestinationAddress >> segmentSizeBits : 0; + int segment = (int)(segmentSizeBits < 64 ? alignedDestinationAddress >> segmentSizeBits : 0); + + // If the device has bounded space, and we are writing a new segment, need to check whether an existing segment needs to be evicted. + if (Capacity != Devices.CAPACITY_UNSPECIFIED && Utility.MonotonicUpdate(ref endSegment, segment, out int oldEnd)) + { + // Attempt to update the stored range until there are enough space on the tier to accomodate the current logTail + int newStartSegment = endSegment - (int)(Capacity >> segmentSizeBits); + // Assuming that we still have enough physical capacity to write another segment, even if delete does not immediately free up space. + TruncateUntilSegmentAsync(newStartSegment, r => { }, null); + } WriteAsync( alignedSourceAddress, - (int)segment, + segment, alignedDestinationAddress & segmentSizeMask, numBytesToWrite, callback, asyncResult); } @@ -126,21 +143,57 @@ public void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddre aligned_read_length, callback, asyncResult); } - /// - /// - /// - /// - /// - public void DeleteAddressRange(long fromAddress, long toAddress) + public abstract void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result); + + public virtual void RemoveSegment(int segment) + { + ManualResetEventSlim completionEvent = new ManualResetEventSlim(false); + RemoveSegmentAsync(segment, r => completionEvent.Set(), null); + completionEvent.Wait(); + } + + public virtual void TruncateUntilSegmentAsync(int toSegment, AsyncCallback callback, IAsyncResult result) + { + // Reset begin range to at least toAddress + if (!Utility.MonotonicUpdate(ref startSegment, toSegment, out int oldStart)) + { + // If no-op, invoke callback and return immediately + callback(result); + return; + } + CountdownEvent countdown = new CountdownEvent(toSegment - oldStart); + // This action needs to be epoch-protected because readers may be issuing reads to the deleted segment, unaware of the delete. + // Because of earlier compare-and-swap, the caller has exclusive access to the range [oldStartSegment, newStartSegment), and there will + // be no double deletes. + epoch.BumpCurrentEpoch(() => + { + for (int i = oldStart; i < toSegment; i++) + { + RemoveSegmentAsync(i, r => { + if (countdown.Signal()) callback(r); + }, result); + } + }); + } + + public virtual void TruncateUntilSegment(int toSegment) { - var fromSegment = segmentSizeBits < 64 ? fromAddress >> segmentSizeBits : 0; - var toSegment = segmentSizeBits < 64 ? toAddress >> segmentSizeBits : 0; - DeleteSegmentRange((int)fromSegment, (int)toSegment); + ManualResetEventSlim completionEvent = new ManualResetEventSlim(false); + TruncateUntilSegmentAsync(toSegment, r => completionEvent.Set(), null); + completionEvent.Wait(); } - private bool AlignedAtSegmentBoundary(long address) + public virtual void TruncateUntilAddressAsync(long toAddress, AsyncCallback callback, IAsyncResult result) { - return ((long)segmentSizeMask & address) == 0; + // Truncate only up to segment boundary if address is not aligned + TruncateUntilSegmentAsync((int)toAddress >> segmentSizeBits, callback, result); + } + + public virtual void TruncateUntilAddress(long toAddress) + { + ManualResetEventSlim completionEvent = new ManualResetEventSlim(false); + TruncateUntilAddressAsync(toAddress, r => completionEvent.Set(), null); + completionEvent.Wait(); } /// @@ -165,32 +218,6 @@ private bool AlignedAtSegmentBoundary(long address) /// public abstract void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult); - /// - /// - /// - /// - /// - public virtual unsafe void DeleteSegmentRange(int fromSegment, int toSegment) - { - ManualResetEventSlim completionEvent = new ManualResetEventSlim(false); - DeleteSegmentRangeAsync(fromSegment, toSegment, r => - { - completionEvent.Set(); - }, null); - completionEvent.Wait(); - } - - public abstract void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult); - - - protected void UseSynchronousDeleteSegmentRangeForAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) - { - DeleteSegmentRange(fromSegment, toSegment); - // TODO(Tianyu): There is apparently no setters on IAsyncResult. Should I just pass this or do I need to set some states? - // e.g. set CompletedSynchronously to true - callback(asyncResult); - } - /// /// /// diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index 789b1ffa4..fceb66ff2 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -4,6 +4,7 @@ using System.Diagnostics; using System.Threading; using System.ComponentModel; +using System.Collections.Concurrent; namespace FASTER.core { @@ -11,13 +12,6 @@ class TieredStorageDevice : StorageDeviceBase { private readonly IList devices; private readonly int commitPoint; - // TODO(Tianyu): For some retarded reason Interlocked provides no CompareExchange for unsigned primitives. - // Because it is assumed that tiers are inclusive with one another, we only need to store the starting address of the log portion avialable on each tier. - // That implies this list is sorted in descending order with the last tier being the head of the log always. - private readonly int[] tierStartSegment; - // Because the device has no access to in-memory log tail information, we need to keep track of that ourselves. Currently this is done by keeping a high-water - // mark of the segment id seen in the WriteAsyncMethod. - private int logTail; // TODO(Tianyu): Not reasoning about what the sector size of a tiered storage should be when different tiers can have different sector sizes. /// @@ -39,10 +33,6 @@ public TieredStorageDevice(int commitPoint, IList devices) : base(Compu // TODO(Tianyu): Should assert that passed in devices are not yet initialized. This is more challenging for recovering. this.devices = devices; this.commitPoint = commitPoint; - tierStartSegment = (int[])Array.CreateInstance(typeof(int), devices.Count); - tierStartSegment.Initialize(); - // TODO(Tianyu): Change after figuring out how to deal with recovery. - logTail = 0; } /// @@ -65,7 +55,6 @@ public TieredStorageDevice(int commitPoint, params IDevice[] devices) : this(com // TODO(Tianyu): Unclear whether this is the right design. Should we allow different tiers different segment sizes? public override void Initialize(long segmentSize, LightEpoch epoch) { - Debug.Assert(epoch != null, "TieredStorage requires epoch protection to work correctly"); base.Initialize(segmentSize, epoch); foreach (IDevice devices in devices) @@ -82,39 +71,6 @@ public override void Close() } } - public override void DeleteSegmentRangeAsync(int fromSegment, int toSegment, AsyncCallback callback, IAsyncResult asyncResult) - { - // Compute the tiers that we need to call delete on. This value may be stale due to concurrent calls to WriteAsync, which may - // evict segments from a device. This is only used as a starting point for the delete. - int toStartTier = FindClosestDeviceContaining(toSegment); - - - // Delete callback should not be invoked until all deletes are completed - var countdown = new CountdownEvent(devices.Count - toStartTier); - - // This is assuming that there are enough physical space left on the device to accomodate more data than the specified capacity --- - // concurrent writes may happen before deletes are completed - for (int i = toStartTier; i < devices.Count; i++) - { - // Attempt to monotonically update the range stored by the tier before calling delete. If monotonic update fails, - // all deletes that needed to be invoked are already called by other threads, so skip this tier. - if (!Utility.MonotonicUpdate(ref tierStartSegment[i], toSegment, out int oldValue)) continue; - - // Otherwise, this function has atomically removed range [oldValue, toSegment). The segments in range [fromSegment, oldValue) are - // deleted by other concurrent threads, so we should not invoke delete on those. When calling delete, we use epoch protection to make - // sure no active readers are accessing the deleted segments before invoking delete. - // TODO(Tianyu): Is this too wasteful in terms of checking out epochs? - epoch.BumpCurrentEpoch(() => - { - devices[i].DeleteSegmentRangeAsync(Math.Max(fromSegment, oldValue), toSegment, r => - { - if (countdown.Signal()) callback(asyncResult); - }, null); - }); - } - } - - public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) { // This device is epoch-protected and cannot be stale while the operation is in flight @@ -126,16 +82,6 @@ public override void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destin public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - // Update the tail if the segment we are writing to is larger than the previous maximum - if (Utility.MonotonicUpdate(ref logTail, segmentId, out int originalTail)) - { - // If indeed we are writing a new segment, some devices may be out of space and require eviction. - for (int i = 0; i < devices.Count; i++) - { - // Instead of updating range using segmentId - UpdateDeviceRangeOnNewSegment(i); - } - } int startTier = FindClosestDeviceContaining(segmentId); // TODO(Tianyu): Can you ever initiate a write that is after the commit point? Given FASTER's model of a read-only region, this will probably never happen. @@ -157,15 +103,25 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulon } else { - // TODO(Tianyu): We may need some type of count down to verify that all writes are finished before closing a device. - // Some device may already provide said guarantee, however. - // Otherwise, simply issue the write without caring about callbacks devices[i].WriteAsync(sourceAddress, segmentId, destinationAddress, numBytesToWrite, (e, n, o) => { }, null); } } } + public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result) + { + int startTier = FindClosestDeviceContaining(segment); + var countdown = new CountdownEvent(devices.Count); + for(int i = startTier; i < devices.Count; i++) + { + devices[i].RemoveSegmentAsync(segment, r => + { + if (countdown.Signal()) callback(r); + }, result); + } + } + private static long ComputeCapacity(IList devices) { long result = 0; @@ -198,44 +154,16 @@ private static string ComputeFileString(IList devices, int commitPoint) return result.ToString(); } - private int FindClosestDeviceContaining(long address) + private int FindClosestDeviceContaining(int segment) { // Can use binary search, but 1) it might not be faster than linear on a array assumed small, and 2) C# built in does not guarantee first element is returned on duplicates. // Therefore we are sticking to the simpler approach at first. for (int i = 0; i < devices.Count; i++) { - if (tierStartSegment[i] <= address) return i; + if (devices[i].StartSegment <= segment) return i; } // TODO(Tianyu): This exception should never be triggered if we enforce that the last tier has unbounded storage. throw new ArgumentException("No such address exists"); } - - private void UpdateDeviceRangeOnNewSegment(int tier) - { - IDevice device = devices[tier]; - // Never need to update range if storage is unbounded - if (device.Capacity == Devices.CAPACITY_UNSPECIFIED) return; - - // Attempt to update the stored range until there are enough space on the tier to accomodate the current logTail - int oldStartSegment, currentTail, newStartSegment; - do - { - oldStartSegment = tierStartSegment[tier]; - currentTail = logTail; - // No need to update if still within capacity; - if ((currentTail - oldStartSegment) * segmentSize <= device.Capacity) return; - // TODO(Tianyu): Can probably use a bit shift instead, but that is private on the base - newStartSegment = currentTail - (int)(device.Capacity / segmentSize); - - } while (Interlocked.CompareExchange(ref tierStartSegment[tier], newStartSegment, oldStartSegment) != oldStartSegment); - - // This action needs to be epoch-protected because readers may be issuing reads to the deleted segment, unaware of the delete. - // Because of earlier compare-and-swap, the caller has exclusive access to the range [oldStartSegment, newStartSegment), and there will - // be no double deletes. - epoch.BumpCurrentEpoch(() => device.DeleteSegmentRangeAsync(oldStartSegment, newStartSegment, r => { }, null)); - // We are assuming that the capacity given to a storage tier is not the physical capacity of the underlying device --- there will always be enough space to - // write extra segments while deletes are underway. If this assumption is not true, we will need to perform any writes in the callback of the delete. - } - } } diff --git a/cs/src/core/Utilities/Utility.cs b/cs/src/core/Utilities/Utility.cs index 375b20cfc..94360a7f2 100644 --- a/cs/src/core/Utilities/Utility.cs +++ b/cs/src/core/Utilities/Utility.cs @@ -246,13 +246,13 @@ public static bool MonotonicUpdate(ref long variable, long newValue, out long ol /// The variable to possibly replace /// The value that replaces the variable if successful /// The orignal value in the variable - /// if oldValue less than newValue + /// if oldValue less than or equal to newValue public static bool MonotonicUpdate(ref int variable, int newValue, out int oldValue) { do { oldValue = variable; - if (oldValue > newValue) return false; + if (oldValue >= newValue) return false; } while (Interlocked.CompareExchange(ref variable, newValue, oldValue) != oldValue); return true; } From abd73e99723e977cf9a9a00a7b7ce4ccce5ce8b8 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Fri, 12 Jul 2019 11:39:40 -0700 Subject: [PATCH 29/56] Move AzurePageBlobDevice into a separate solution. --- cs/FASTER.sln | 14 +++ cs/src/cloud/AzurePageBlobDevice.cs | 137 ++++++++++++++++++++++++++++ cs/src/cloud/FASTER.cloud.csproj | 48 ++++++++++ cs/src/core/Device/Devices.cs | 21 ----- cs/src/core/FASTER.core.csproj | 1 - cs/test/BasicDiskFASTERTests.cs | 6 +- cs/test/FASTER.test.csproj | 1 + 7 files changed, 205 insertions(+), 23 deletions(-) create mode 100644 cs/src/cloud/AzurePageBlobDevice.cs create mode 100644 cs/src/cloud/FASTER.cloud.csproj diff --git a/cs/FASTER.sln b/cs/FASTER.sln index 9424c9738..e52a2c076 100644 --- a/cs/FASTER.sln +++ b/cs/FASTER.sln @@ -38,6 +38,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ClassCacheMT", "playground\ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VarLenStructSample", "playground\VarLenStructSample\VarLenStructSample.csproj", "{37B3C501-A7A1-4E86-B766-22F9BEF31DFE}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "cloud", "cloud", "{A6B14415-D316-4955-BE5F-725BB2DEBEBE}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FASTER.cloud", "src\cloud\FASTER.cloud.csproj", "{ECF8EE9C-0D02-4EB3-8A25-6A318719F029}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -134,6 +138,14 @@ Global {37B3C501-A7A1-4E86-B766-22F9BEF31DFE}.Release|Any CPU.Build.0 = Release|x64 {37B3C501-A7A1-4E86-B766-22F9BEF31DFE}.Release|x64.ActiveCfg = Release|x64 {37B3C501-A7A1-4E86-B766-22F9BEF31DFE}.Release|x64.Build.0 = Release|x64 + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Debug|Any CPU.Build.0 = Debug|Any CPU + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Debug|x64.ActiveCfg = Debug|Any CPU + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Debug|x64.Build.0 = Debug|Any CPU + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Release|Any CPU.ActiveCfg = Release|Any CPU + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Release|Any CPU.Build.0 = Release|Any CPU + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Release|x64.ActiveCfg = Release|Any CPU + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -151,6 +163,8 @@ Global {079F8DF4-96D4-41AC-AD04-308FDF70E371} = {E6026D6A-01C5-4582-B2C1-64751490DABE} {F989FF23-5DD7-4D8F-9458-BDA22EFC038D} = {E6026D6A-01C5-4582-B2C1-64751490DABE} {37B3C501-A7A1-4E86-B766-22F9BEF31DFE} = {E6026D6A-01C5-4582-B2C1-64751490DABE} + {A6B14415-D316-4955-BE5F-725BB2DEBEBE} = {28800357-C8CE-4CD0-A2AD-D4A910ABB496} + {ECF8EE9C-0D02-4EB3-8A25-6A318719F029} = {A6B14415-D316-4955-BE5F-725BB2DEBEBE} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {A0750637-2CCB-4139-B25E-F2CE740DCFAC} diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs new file mode 100644 index 000000000..c31611a49 --- /dev/null +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Collections.Concurrent; +using System.IO; +using System.Threading; +using FASTER.core; +using Microsoft.Azure.Storage; +using Microsoft.Azure.Storage.Blob; + +namespace FASTER.cloud +{ + /// + /// A IDevice Implementation that is backed byAzure Page Blob. + /// This device is expected to be an order of magnitude slower than local SSD or HDD, but provide scalability and shared access in the cloud. + /// + public class AzurePageBlobDevice : StorageDeviceBase + { + private CloudBlobContainer container; + private readonly ConcurrentDictionary blobs; + private readonly string blobName; + private readonly bool deleteOnClose; + + // Page Blobs permit blobs of max size 8 TB, but the emulator permits only 2 GB + private const long MAX_BLOB_SIZE = (long)(2 * 10e8); + // Azure Page Blobs have a fixed sector size of 512 bytes. + private const uint PAGE_BLOB_SECTOR_SIZE = 512; + + /// + /// Constructs a new AzurePageBlobDevice instance + /// + /// The connection string to use when estblishing connection to Azure Blobs + /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created + /// A descriptive name that will be the prefix of all blobs created with this device + /// + /// True if the program should delete all blobs created on call to Close. False otherwise. + /// The container is not deleted even if it was created in this constructor + /// + public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false) + : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE) + { + CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); + CloudBlobClient client = storageAccount.CreateCloudBlobClient(); + container = client.GetContainerReference(containerName); + container.CreateIfNotExists(); + blobs = new ConcurrentDictionary(); + this.blobName = blobName; + this.deleteOnClose = deleteOnClose; + } + + /// + /// Inherited + /// + public override void Close() + { + // Unlike in LocalStorageDevice, we explicitly remove all page blobs if the deleteOnClose flag is set, instead of relying on the operating system + // to delete files after the end of our process. This leads to potential problems if multiple instances are sharing the same underlying page blobs. + // + // Since this flag is presumably only used for testing though, it is probably fine. + if (deleteOnClose) + { + foreach (var entry in blobs) + { + entry.Value.Delete(); + } + } + } + /// + /// Inherited + /// + public override void DeleteSegmentRange(int fromSegment, int toSegment) + { + for (int i = fromSegment; i < toSegment; i++) + { + if (blobs.TryRemove(i, out CloudPageBlob blob)) + { + blob.Delete(); + } + } + } + /// + /// Inherited + /// + public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) + { + CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); + + // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API + Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); + NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); + + UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)destinationAddress, readLength, readLength, FileAccess.Write); + + // TODO(Tianyu): This implementation seems to swallow exceptions that would otherwise be thrown from the synchronous version of this + // function. I wasn't able to find any good documentaiton on how exceptions are propagated or handled in this scenario. + pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => callback(0, readLength, ovNative), asyncResult); + } + /// + /// Inherited + /// + public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + { + CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); + + // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API + Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); + NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); + UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)sourceAddress, numBytesToWrite); + pageBlob.BeginWritePages(stream, (long)destinationAddress, null, ar => callback(0, numBytesToWrite, ovNative), asyncResult); + } + + private CloudPageBlob GetOrAddPageBlob(int segmentId) + { + return blobs.GetOrAdd(segmentId, id => CreatePageBlob(id)); + } + + private CloudPageBlob CreatePageBlob(int segmentId) + { + // TODO(Tianyu): Is this now blocking? How sould this work when multiple apps share the same backing blob store? + // TODO(Tianyu): Need a better naming scheme? + CloudPageBlob blob = container.GetPageBlobReference(blobName + segmentId); + + // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because + // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of + // how large it can grow to. + var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; + + // TODO(Tianyu): There is a race hidden here if multiple applications are interacting with the same underlying blob store. + // How that should be fixed is dependent on our decision on the architecture. + blob.Create(size); + return blob; + } + } + + +} diff --git a/cs/src/cloud/FASTER.cloud.csproj b/cs/src/cloud/FASTER.cloud.csproj new file mode 100644 index 000000000..393e979ab --- /dev/null +++ b/cs/src/cloud/FASTER.cloud.csproj @@ -0,0 +1,48 @@ + + + + netstandard2.0;net46 + AnyCPU;x64 + + + + true + FASTER.cloud + FASTER.cloud + prompt + true + + Library + + ../../FASTER.snk + false + bin\$(Platform)\$(Configuration)\$(TargetFramework)\$(AssemblyName).xml + + + + TRACE;DEBUG + full + bin\$(Platform)\Debug\ + + + TRACE + pdbonly + true + bin\$(Platform)\Release\ + + + $(DefineConstants);DOTNETCORE + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index 68fb5a02a..c4ee7cdbc 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -13,9 +13,6 @@ namespace FASTER.core /// public static class Devices { - private const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; - private const string TEST_CONTAINER = "test"; - /// /// Create a storage device for the log /// @@ -43,24 +40,6 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru } return logDevice; } - - // TODO(Tianyu): How do we want to integrate the option of using AzurePageBlobDevice into the original static factory class? We can either follow the original pattern and somehow encode this in the string path argument, - // or use concrete factories that are initialized per instance to only create one type. - /// - /// Creates a log device backed by Azure Page Blob. - /// - /// A descriptive name that will be the prefix of all blobs created with this device - /// The connection string to use when estblishing connection to Azure Blobs - /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created - /// - /// True if the program should delete all blobs created on call to Close. False otherwise. - /// The container is not deleted even if it was created in this constructor - /// - /// The constructed Device instance - public static IDevice CreateAzurePageBlobDevice(string blobName, string connectionString = EMULATED_STORAGE_STRING, string containerName = TEST_CONTAINER, bool deleteOnClose = false) - { - return new AzurePageBlobDevice(connectionString, containerName, blobName, deleteOnClose); - } } diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj index 57157d97d..c2dbeca27 100644 --- a/cs/src/core/FASTER.core.csproj +++ b/cs/src/core/FASTER.core.csproj @@ -35,7 +35,6 @@ - diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index c819ce083..dc99eb445 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -10,16 +10,20 @@ using FASTER.core; using System.IO; using NUnit.Framework; +using FASTER.cloud; namespace FASTER.test { + // TODO(Tianyu): Now that we are also testing device with Azure Page Blobs here, should we also rename the test? [TestFixture] internal class BasicDiskFASTERTests { private FasterKV fht; private IDevice log; + public const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; + public const string TEST_CONTAINER = "test"; void TestDeviceWriteRead(IDevice log) { @@ -93,7 +97,7 @@ public void NativeDiskWriteRead() [Test] public void PageBlobWriteRead() { - TestDeviceWriteRead(Devices.CreateAzurePageBlobDevice("BasicDiskFASTERTests", deleteOnClose: false)); + TestDeviceWriteRead(new AzurePageBlobDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); } } } diff --git a/cs/test/FASTER.test.csproj b/cs/test/FASTER.test.csproj index a6c0cc437..0a4d9cec3 100644 --- a/cs/test/FASTER.test.csproj +++ b/cs/test/FASTER.test.csproj @@ -43,6 +43,7 @@ + From cc02d0e6ed9b37e747e7a062158fc3767dfde1b7 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Fri, 12 Jul 2019 14:46:54 -0700 Subject: [PATCH 30/56] Remove stale file --- cs/src/core/Device/AzurePageBlobDevice.cs | 136 ---------------------- 1 file changed, 136 deletions(-) delete mode 100644 cs/src/core/Device/AzurePageBlobDevice.cs diff --git a/cs/src/core/Device/AzurePageBlobDevice.cs b/cs/src/core/Device/AzurePageBlobDevice.cs deleted file mode 100644 index 30ffea0f4..000000000 --- a/cs/src/core/Device/AzurePageBlobDevice.cs +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -using System; -using System.Collections.Concurrent; -using System.IO; -using System.Threading; -using Microsoft.Azure.Storage; -using Microsoft.Azure.Storage.Blob; - -namespace FASTER.core -{ - /// - /// A IDevice Implementation that is backed byAzure Page Blob. - /// This device is expected to be an order of magnitude slower than local SSD or HDD, but provide scalability and shared access in the cloud. - /// - public class AzurePageBlobDevice : StorageDeviceBase - { - private CloudBlobContainer container; - private readonly ConcurrentDictionary blobs; - private readonly string blobName; - private readonly bool deleteOnClose; - - // Page Blobs permit blobs of max size 8 TB, but the emulator permits only 2 GB - private const long MAX_BLOB_SIZE = (long)(2 * 10e8); - // Azure Page Blobs have a fixed sector size of 512 bytes. - private const uint PAGE_BLOB_SECTOR_SIZE = 512; - - /// - /// Constructs a new AzurePageBlobDevice instance - /// - /// The connection string to use when estblishing connection to Azure Blobs - /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created - /// A descriptive name that will be the prefix of all blobs created with this device - /// - /// True if the program should delete all blobs created on call to Close. False otherwise. - /// The container is not deleted even if it was created in this constructor - /// - public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false) - : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE) - { - CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); - CloudBlobClient client = storageAccount.CreateCloudBlobClient(); - container = client.GetContainerReference(containerName); - container.CreateIfNotExists(); - blobs = new ConcurrentDictionary(); - this.blobName = blobName; - this.deleteOnClose = deleteOnClose; - } - - /// - /// Inherited - /// - public override void Close() - { - // Unlike in LocalStorageDevice, we explicitly remove all page blobs if the deleteOnClose flag is set, instead of relying on the operating system - // to delete files after the end of our process. This leads to potential problems if multiple instances are sharing the same underlying page blobs. - // - // Since this flag is presumably only used for testing though, it is probably fine. - if (deleteOnClose) - { - foreach (var entry in blobs) - { - entry.Value.Delete(); - } - } - } - /// - /// Inherited - /// - public override void DeleteSegmentRange(int fromSegment, int toSegment) - { - for (int i = fromSegment; i < toSegment; i++) - { - if (blobs.TryRemove(i, out CloudPageBlob blob)) - { - blob.Delete(); - } - } - } - /// - /// Inherited - /// - public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) - { - CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); - - // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API - Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); - NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); - - UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)destinationAddress, readLength, readLength, FileAccess.Write); - - // TODO(Tianyu): This implementation seems to swallow exceptions that would otherwise be thrown from the synchronous version of this - // function. I wasn't able to find any good documentaiton on how exceptions are propagated or handled in this scenario. - pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => callback(0, readLength, ovNative), asyncResult); - } - /// - /// Inherited - /// - public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) - { - CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); - - // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API - Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); - NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); - UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)sourceAddress, numBytesToWrite); - pageBlob.BeginWritePages(stream, (long)destinationAddress, null, ar => callback(0, numBytesToWrite, ovNative), asyncResult); - } - - private CloudPageBlob GetOrAddPageBlob(int segmentId) - { - return blobs.GetOrAdd(segmentId, id => CreatePageBlob(id)); - } - - private CloudPageBlob CreatePageBlob(int segmentId) - { - // TODO(Tianyu): Is this now blocking? How sould this work when multiple apps share the same backing blob store? - // TODO(Tianyu): Need a better naming scheme? - CloudPageBlob blob = container.GetPageBlobReference(blobName + segmentId); - - // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because - // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of - // how large it can grow to. - var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; - - // TODO(Tianyu): There is a race hidden here if multiple applications are interacting with the same underlying blob store. - // How that should be fixed is dependent on our decision on the architecture. - blob.Create(size); - return blob; - } - } - - -} From 438a40c3fe51e633f8d18d808b32cd91dc5b0065 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 10:34:50 -0700 Subject: [PATCH 31/56] Better exception handling. Dealing with contention on blob creation. --- cs/src/cloud/AzurePageBlobDevice.cs | 71 ++++++++++++++++++----------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index c31611a49..14e229f4e 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -84,7 +84,8 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) /// public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) { - CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); + // It is up to the allocator to make sure no reads are issued to segments before they are written + if (!blobs.TryGetValue(segmentId, out CloudPageBlob pageBlob)) throw new InvalidOperationException("Attempting to read non-existent segments"); // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); @@ -94,42 +95,60 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr // TODO(Tianyu): This implementation seems to swallow exceptions that would otherwise be thrown from the synchronous version of this // function. I wasn't able to find any good documentaiton on how exceptions are propagated or handled in this scenario. - pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => callback(0, readLength, ovNative), asyncResult); + pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => { + // Should propagate any exceptions + pageBlob.EndDownloadRangeToStream(ar); + callback(0, readLength, ovNative); + }, asyncResult); } /// /// Inherited /// public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - CloudPageBlob pageBlob = GetOrAddPageBlob(segmentId); + if (!blobs.TryGetValue(segmentId, out CloudPageBlob pageBlob)) + { - // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API - Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); - NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); - UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)sourceAddress, numBytesToWrite); - pageBlob.BeginWritePages(stream, (long)destinationAddress, null, ar => callback(0, numBytesToWrite, ovNative), asyncResult); - } + // If no blob exists for the segment, we must first create the segment asynchronouly. (Create call takes ~70 ms by measurement) + pageBlob = container.GetPageBlobReference(blobName + segmentId); - private CloudPageBlob GetOrAddPageBlob(int segmentId) - { - return blobs.GetOrAdd(segmentId, id => CreatePageBlob(id)); + // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because + // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of + // how large it can grow to. + var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; + // It is up to allocator to ensure that no reads happen before the callback of this function is invoked. + if (blobs.TryAdd(segmentId, pageBlob)) + { + pageBlob.BeginCreate(size, ar => { + pageBlob.EndCreate(ar); + WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + }, null); + } + else + { + // Some other thread beat us to calling create, should use their handle to invoke write directly instead + WriteToBlobAsync(blobs[segmentId], sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + } + } + else + { + // Write directly to the existing blob otherwise + WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + } } - private CloudPageBlob CreatePageBlob(int segmentId) + private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAddress, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - // TODO(Tianyu): Is this now blocking? How sould this work when multiple apps share the same backing blob store? - // TODO(Tianyu): Need a better naming scheme? - CloudPageBlob blob = container.GetPageBlobReference(blobName + segmentId); - - // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because - // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of - // how large it can grow to. - var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; - - // TODO(Tianyu): There is a race hidden here if multiple applications are interacting with the same underlying blob store. - // How that should be fixed is dependent on our decision on the architecture. - blob.Create(size); - return blob; + // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API + Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); + NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); + UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)sourceAddress, numBytesToWrite); + blob.BeginWritePages(stream, (long)destinationAddress, null, ar => + { + // Should propagate any exceptions + blob.EndWritePages(ar); + callback(0, numBytesToWrite, ovNative); + }, asyncResult); } } From 137329007ff7ea70f0eabf743ca4066783c40eb1 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 13:27:15 -0700 Subject: [PATCH 32/56] minor cleanups --- cs/src/cloud/AzurePageBlobDevice.cs | 2 +- cs/src/core/Device/Devices.cs | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index 14e229f4e..c3bc09e83 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -104,7 +104,7 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr /// /// Inherited /// - public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { if (!blobs.TryGetValue(segmentId, out CloudPageBlob pageBlob)) { diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index c4ee7cdbc..9428fd848 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -22,7 +22,6 @@ public static class Devices /// Device instance public static IDevice CreateLogDevice(string logPath, bool preallocateFile = true, bool deleteOnClose = false) { - if (string.IsNullOrWhiteSpace(logPath)) return new NullDevice(); @@ -41,6 +40,4 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru return logDevice; } } - - } From 34b75e6a17612b5cfefdba9560553fccad14bbfd Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 14:13:53 -0700 Subject: [PATCH 33/56] Add debug prints to debug Azure pipeline timeout --- cs/test/BasicDiskFASTERTests.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index dc99eb445..223e6d2af 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -97,7 +97,11 @@ public void NativeDiskWriteRead() [Test] public void PageBlobWriteRead() { - TestDeviceWriteRead(new AzurePageBlobDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); + for (int i = 0; i < 10; i++) + { + TestDeviceWriteRead(new AzurePageBlobDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); + if (i != 9) TearDown(); + } } } } From ce131be74e4ebb7e20f7d2241e846c391a7fcff2 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 14:31:29 -0700 Subject: [PATCH 34/56] Handle silent failure of exception in background thread. --- cs/src/cloud/AzurePageBlobDevice.cs | 33 +++++++++++++++++++++++------ cs/test/BasicDiskFASTERTests.cs | 12 +++++------ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index c3bc09e83..760018387 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -92,13 +92,22 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)destinationAddress, readLength, readLength, FileAccess.Write); - - // TODO(Tianyu): This implementation seems to swallow exceptions that would otherwise be thrown from the synchronous version of this - // function. I wasn't able to find any good documentaiton on how exceptions are propagated or handled in this scenario. pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => { - // Should propagate any exceptions - pageBlob.EndDownloadRangeToStream(ar); + try + { + pageBlob.EndDownloadRangeToStream(ar); + } + // I don't think I can be more specific in catch here because no documentation on exception behavior is provided + catch (Exception e) + { + Overlapped.Free(ovNative); + // Is there any documentation on the meaning of error codes here? The handler suggests that any non-zero value is an error + // but does not distinguish between them. + callback(1, readLength, ovNative); + } callback(0, readLength, ovNative); + + }, asyncResult); } /// @@ -145,8 +154,18 @@ private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAdd UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)sourceAddress, numBytesToWrite); blob.BeginWritePages(stream, (long)destinationAddress, null, ar => { - // Should propagate any exceptions - blob.EndWritePages(ar); + try + { + blob.EndWritePages(ar); + } + // I don't think I can be more specific in catch here because no documentation on exception behavior is provided + catch (Exception e) + { + Overlapped.Free(ovNative); + // Is there any documentation on the meaning of error codes here? The handler suggests that any non-zero value is an error + // but does not distinguish between them. + callback(1, numBytesToWrite, ovNative); + } callback(0, numBytesToWrite, ovNative); }, asyncResult); } diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index 223e6d2af..bcc180d28 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -11,6 +11,7 @@ using System.IO; using NUnit.Framework; using FASTER.cloud; +using System.Diagnostics; namespace FASTER.test { @@ -25,7 +26,7 @@ internal class BasicDiskFASTERTests public const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; public const string TEST_CONTAINER = "test"; - void TestDeviceWriteRead(IDevice log) + void TestDeviceWriteRead(IDevice log, bool debug = false) { this.log = log; fht = new FasterKV @@ -36,11 +37,13 @@ void TestDeviceWriteRead(IDevice log) for (int i = 0; i < 2000; i++) { + if (debug && i % 500 == 0) Debug.Print("inserted {0} tuples\n", i); var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; fht.Upsert(ref key1, ref value, Empty.Default, 0); } fht.CompletePending(true); + if (debug) Debug.Print("Write pending cleared"); // Update first 100 using RMW from storage for (int i = 0; i < 100; i++) @@ -50,6 +53,7 @@ void TestDeviceWriteRead(IDevice log) var status = fht.RMW(ref key1, ref input, Empty.Default, 0); if (status == Status.PENDING) fht.CompletePending(true); + if (debug && i % 10 == 0) Debug.Print("Modified {0} tuples\n", i); } @@ -97,11 +101,7 @@ public void NativeDiskWriteRead() [Test] public void PageBlobWriteRead() { - for (int i = 0; i < 10; i++) - { - TestDeviceWriteRead(new AzurePageBlobDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); - if (i != 9) TearDown(); - } + TestDeviceWriteRead(new AzurePageBlobDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false), true); } } } From fb4a438a482b595d1cb3e8ed70b48f233f195f19 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 14:48:42 -0700 Subject: [PATCH 35/56] does this work? --- cs/test/BasicDiskFASTERTests.cs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index bcc180d28..049020596 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -21,11 +21,22 @@ namespace FASTER.test [TestFixture] internal class BasicDiskFASTERTests { + private TestContext testContextInstance; private FasterKV fht; private IDevice log; public const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; public const string TEST_CONTAINER = "test"; + /// + /// Gets or sets the test context which provides + /// information about and functionality for the current test run. + /// + public TestContext TestContext + { + get { return testContextInstance; } + set { testContextInstance = value; } + } + void TestDeviceWriteRead(IDevice log, bool debug = false) { this.log = log; @@ -37,13 +48,13 @@ void TestDeviceWriteRead(IDevice log, bool debug = false) for (int i = 0; i < 2000; i++) { - if (debug && i % 500 == 0) Debug.Print("inserted {0} tuples\n", i); + if (debug && i % 500 == 0) TestContext.WriteLine("inserted {0} tuples", i); var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; fht.Upsert(ref key1, ref value, Empty.Default, 0); } fht.CompletePending(true); - if (debug) Debug.Print("Write pending cleared"); + if (debug) TestContext.WriteLine("Write pending cleared"); // Update first 100 using RMW from storage for (int i = 0; i < 100; i++) @@ -53,7 +64,7 @@ void TestDeviceWriteRead(IDevice log, bool debug = false) var status = fht.RMW(ref key1, ref input, Empty.Default, 0); if (status == Status.PENDING) fht.CompletePending(true); - if (debug && i % 10 == 0) Debug.Print("Modified {0} tuples\n", i); + if (debug && i % 10 == 0) TestContext.WriteLine("Modified {0} tuples\n", i); } From 28249d791b2853715b4dcc915f6156f34e0667a5 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 15:18:35 -0700 Subject: [PATCH 36/56] Revert to try and locate source of Azure pipeline failure --- cs/src/cloud/AzurePageBlobDevice.cs | 39 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index 760018387..07b93fd7d 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -66,6 +66,7 @@ public override void Close() } } } + /// /// Inherited /// @@ -79,6 +80,7 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) } } } + /// /// Inherited /// @@ -106,10 +108,9 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr callback(1, readLength, ovNative); } callback(0, readLength, ovNative); - - }, asyncResult); } + /// /// Inherited /// @@ -126,24 +127,26 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti // how large it can grow to. var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; // It is up to allocator to ensure that no reads happen before the callback of this function is invoked. - if (blobs.TryAdd(segmentId, pageBlob)) - { - pageBlob.BeginCreate(size, ar => { - pageBlob.EndCreate(ar); - WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); - }, null); - } - else - { - // Some other thread beat us to calling create, should use their handle to invoke write directly instead - WriteToBlobAsync(blobs[segmentId], sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); - } + // if (blobs.TryAdd(segmentId, pageBlob)) + // { + // pageBlob.BeginCreate(size, ar => { + // pageBlob.EndCreate(ar); + // WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + // }, null); + // } + // else + // { + // Some other thread beat us to calling create, should use their handle to invoke write directly instead + // WriteToBlobAsync(blobs[segmentId], sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + // } + pageBlob.Create(size); + blobs.TryAdd(segmentId, pageBlob); } - else - { - // Write directly to the existing blob otherwise + // else + // { + // Write directly to the existing blob otherwise WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); - } + // } } private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAddress, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) From db640379d59350d494aacbd70ea8648901d87159 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 15:27:52 -0700 Subject: [PATCH 37/56] more attempts --- cs/src/cloud/AzurePageBlobDevice.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index 07b93fd7d..0a243a6e5 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -139,8 +139,8 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti // Some other thread beat us to calling create, should use their handle to invoke write directly instead // WriteToBlobAsync(blobs[segmentId], sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); // } - pageBlob.Create(size); blobs.TryAdd(segmentId, pageBlob); + pageBlob.Create(size); } // else // { From 5b48ecd886a09946a9224892598124ebd3f96f8d Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 15:48:25 -0700 Subject: [PATCH 38/56] More attempts --- cs/src/cloud/AzurePageBlobDevice.cs | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index 0a243a6e5..b100f00c2 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -118,7 +118,6 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti { if (!blobs.TryGetValue(segmentId, out CloudPageBlob pageBlob)) { - // If no blob exists for the segment, we must first create the segment asynchronouly. (Create call takes ~70 ms by measurement) pageBlob = container.GetPageBlobReference(blobName + segmentId); @@ -127,26 +126,18 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti // how large it can grow to. var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; // It is up to allocator to ensure that no reads happen before the callback of this function is invoked. - // if (blobs.TryAdd(segmentId, pageBlob)) - // { - // pageBlob.BeginCreate(size, ar => { - // pageBlob.EndCreate(ar); - // WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); - // }, null); - // } - // else - // { - // Some other thread beat us to calling create, should use their handle to invoke write directly instead - // WriteToBlobAsync(blobs[segmentId], sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); - // } + pageBlob.BeginCreate(size, ar => + { + pageBlob.EndCreate(ar); + WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + }, null); blobs.TryAdd(segmentId, pageBlob); - pageBlob.Create(size); } - // else - // { + else + { // Write directly to the existing blob otherwise WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); - // } + } } private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAddress, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) From ad671f60941c328ab2442c9a157c30a031cca652 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 15:57:21 -0700 Subject: [PATCH 39/56] More attempts --- cs/src/cloud/AzurePageBlobDevice.cs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index b100f00c2..0872af375 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -128,7 +128,15 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti // It is up to allocator to ensure that no reads happen before the callback of this function is invoked. pageBlob.BeginCreate(size, ar => { - pageBlob.EndCreate(ar); + try + { + pageBlob.EndCreate(ar); + + } + catch (Exception e) + { + // Ignore, WTF + } WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); }, null); blobs.TryAdd(segmentId, pageBlob); From 1adc2d021b066ac6cf81da9884efca239e8e6532 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 16:15:11 -0700 Subject: [PATCH 40/56] More attempts --- cs/src/cloud/AzurePageBlobDevice.cs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index 0872af375..7b7a050e7 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -102,7 +102,6 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr // I don't think I can be more specific in catch here because no documentation on exception behavior is provided catch (Exception e) { - Overlapped.Free(ovNative); // Is there any documentation on the meaning of error codes here? The handler suggests that any non-zero value is an error // but does not distinguish between them. callback(1, readLength, ovNative); @@ -128,17 +127,9 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti // It is up to allocator to ensure that no reads happen before the callback of this function is invoked. pageBlob.BeginCreate(size, ar => { - try - { - pageBlob.EndCreate(ar); - - } - catch (Exception e) - { - // Ignore, WTF - } - WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + pageBlob.EndCreate(ar); }, null); + WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); blobs.TryAdd(segmentId, pageBlob); } else @@ -163,7 +154,6 @@ private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAdd // I don't think I can be more specific in catch here because no documentation on exception behavior is provided catch (Exception e) { - Overlapped.Free(ovNative); // Is there any documentation on the meaning of error codes here? The handler suggests that any non-zero value is an error // but does not distinguish between them. callback(1, numBytesToWrite, ovNative); From 9c189d15ce6e1f48b2b6252c159c6272bdc47559 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 16:25:13 -0700 Subject: [PATCH 41/56] More attempts --- cs/src/core/Allocator/AllocatorBase.cs | 6 +++--- cs/src/core/Allocator/GenericAllocator.cs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 558ac6f52..c2cf8dba8 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -1473,7 +1473,7 @@ private void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, NativeOverl { if (errorCode != 0) { - Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } var result = (AsyncGetFromDiskResult>)Overlapped.Unpack(overlap).AsyncResult; @@ -1532,7 +1532,7 @@ private void AsyncFlushPageCallback(uint errorCode, uint numBytes, NativeOverlap { if (errorCode != 0) { - Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } // Set the page status to flushed @@ -1575,7 +1575,7 @@ private void AsyncFlushPageToDeviceCallback(uint errorCode, uint numBytes, Nativ { if (errorCode != 0) { - Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } PageAsyncFlushResult result = (PageAsyncFlushResult)Overlapped.Unpack(overlap).AsyncResult; diff --git a/cs/src/core/Allocator/GenericAllocator.cs b/cs/src/core/Allocator/GenericAllocator.cs index 3e8ac26c0..b6e136c3a 100644 --- a/cs/src/core/Allocator/GenericAllocator.cs +++ b/cs/src/core/Allocator/GenericAllocator.cs @@ -445,7 +445,7 @@ private void AsyncReadPageCallback(uint errorCode, uint numBytes, NativeOverlapp { if (errorCode != 0) { - Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } // Set the page status to flushed @@ -494,7 +494,7 @@ private void AsyncFlushPartialObjectLogCallback(uint errorCode, uint n { if (errorCode != 0) { - Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } // Set the page status to flushed @@ -508,7 +508,7 @@ private void AsyncReadPageWithObjectsCallback(uint errorCode, uint num { if (errorCode != 0) { - Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } PageAsyncReadResult result = (PageAsyncReadResult)Overlapped.Unpack(overlap).AsyncResult; From 705c91ce81945ca6f0f81556493a2e70d2859bf8 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Mon, 15 Jul 2019 18:10:25 -0700 Subject: [PATCH 42/56] Try to differentiate between read error and write error --- cs/src/cloud/AzurePageBlobDevice.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index 7b7a050e7..aded22303 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Concurrent; +using System.Diagnostics; using System.IO; using System.Threading; using FASTER.core; @@ -104,7 +105,7 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr { // Is there any documentation on the meaning of error codes here? The handler suggests that any non-zero value is an error // but does not distinguish between them. - callback(1, readLength, ovNative); + callback(2, readLength, ovNative); } callback(0, readLength, ovNative); }, asyncResult); From 728c6512836cce07c2a848bfe0fa2533588a94ec Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 16 Jul 2019 08:37:50 -0700 Subject: [PATCH 43/56] print exception message --- cs/src/cloud/AzurePageBlobDevice.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index aded22303..95e61c2a0 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -155,6 +155,7 @@ private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAdd // I don't think I can be more specific in catch here because no documentation on exception behavior is provided catch (Exception e) { + Console.WriteLine(e.Message); // Is there any documentation on the meaning of error codes here? The handler suggests that any non-zero value is an error // but does not distinguish between them. callback(1, numBytesToWrite, ovNative); From e46ea6e860b73fb19594eceedfb98f05eb6b2055 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Wed, 10 Jul 2019 14:23:08 -0700 Subject: [PATCH 44/56] Pick out the MonotonicUpdate function into utility (cherry picked from commit 0a8742202e8aac7e4aa6541c78be502c02f4ae54) --- cs/src/core/Allocator/AllocatorBase.cs | 51 +++++++------------------- cs/src/core/Utilities/Utility.cs | 34 +++++++++++++++++ 2 files changed, 47 insertions(+), 38 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index c2cf8dba8..13d6061ed 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -870,7 +870,7 @@ public void ShiftReadOnlyToTail(out long tailAddress) tailAddress = GetTailAddress(); long localTailAddress = tailAddress; long currentReadOnlyOffset = ReadOnlyAddress; - if (MonotonicUpdate(ref ReadOnlyAddress, tailAddress, out long oldReadOnlyOffset)) + if (Utility.MonotonicUpdate(ref ReadOnlyAddress, tailAddress, out long oldReadOnlyOffset)) { epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(localTailAddress, false)); } @@ -882,7 +882,7 @@ public void ShiftReadOnlyToTail(out long tailAddress) /// public bool ShiftReadOnlyAddress(long newReadOnlyAddress) { - if (MonotonicUpdate(ref ReadOnlyAddress, newReadOnlyAddress, out long oldReadOnlyOffset)) + if (Utility.MonotonicUpdate(ref ReadOnlyAddress, newReadOnlyAddress, out long oldReadOnlyOffset)) { epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(newReadOnlyAddress, false)); return true; @@ -897,19 +897,19 @@ public bool ShiftReadOnlyAddress(long newReadOnlyAddress) public void ShiftBeginAddress(long newBeginAddress) { // First update the begin address - MonotonicUpdate(ref BeginAddress, newBeginAddress, out long oldBeginAddress); + Utility.MonotonicUpdate(ref BeginAddress, newBeginAddress, out long oldBeginAddress); // Then the head address - var h = MonotonicUpdate(ref HeadAddress, newBeginAddress, out long old); + var h = Utility.MonotonicUpdate(ref HeadAddress, newBeginAddress, out long old); // Finally the read-only address - var r = MonotonicUpdate(ref ReadOnlyAddress, newBeginAddress, out old); + var r = Utility.MonotonicUpdate(ref ReadOnlyAddress, newBeginAddress, out old); // Clean up until begin address epoch.BumpCurrentEpoch(() => { if (r) { - MonotonicUpdate(ref SafeReadOnlyAddress, newBeginAddress, out long _old); - MonotonicUpdate(ref FlushedUntilAddress, newBeginAddress, out _old); + Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newBeginAddress, out long _old); + Utility.MonotonicUpdate(ref FlushedUntilAddress, newBeginAddress, out _old); } if (h) OnPagesClosed(newBeginAddress); @@ -935,7 +935,7 @@ protected virtual void DeleteAddressRange(long fromAddress, long toAddress) /// public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendingFlushComplete = false) { - if (MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress)) + if (Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress)) { Debug.WriteLine("SafeReadOnly shifted from {0:X} to {1:X}", oldSafeReadOnlyAddress, newSafeReadOnlyAddress); long startPage = oldSafeReadOnlyAddress >> LogPageSizeBits; @@ -964,7 +964,7 @@ public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendi /// public void OnPagesClosed(long newSafeHeadAddress) { - if (MonotonicUpdate(ref SafeHeadAddress, newSafeHeadAddress, out long oldSafeHeadAddress)) + if (Utility.MonotonicUpdate(ref SafeHeadAddress, newSafeHeadAddress, out long oldSafeHeadAddress)) { Debug.WriteLine("SafeHeadOffset shifted from {0:X} to {1:X}", oldSafeHeadAddress, newSafeHeadAddress); @@ -1020,7 +1020,7 @@ private void PageAlignedShiftReadOnlyAddress(long currentTailAddress) long currentReadOnlyAddress = ReadOnlyAddress; long pageAlignedTailAddress = currentTailAddress & ~PageSizeMask; long desiredReadOnlyAddress = (pageAlignedTailAddress - ReadOnlyLagAddress); - if (MonotonicUpdate(ref ReadOnlyAddress, desiredReadOnlyAddress, out long oldReadOnlyAddress)) + if (Utility.MonotonicUpdate(ref ReadOnlyAddress, desiredReadOnlyAddress, out long oldReadOnlyAddress)) { Debug.WriteLine("Allocate: Moving read-only offset from {0:X} to {1:X}", oldReadOnlyAddress, desiredReadOnlyAddress); epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(desiredReadOnlyAddress)); @@ -1050,7 +1050,7 @@ private void PageAlignedShiftHeadAddress(long currentTailAddress) if (ReadCache && (newHeadAddress > HeadAddress)) EvictCallback(HeadAddress, newHeadAddress); - if (MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) + if (Utility.MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) { Debug.WriteLine("Allocate: Moving head offset from {0:X} to {1:X}", oldHeadAddress, newHeadAddress); epoch.BumpCurrentEpoch(() => OnPagesClosed(newHeadAddress)); @@ -1075,7 +1075,7 @@ public long ShiftHeadAddress(long desiredHeadAddress) if (ReadCache && (newHeadAddress > HeadAddress)) EvictCallback(HeadAddress, newHeadAddress); - if (MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) + if (Utility.MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) { Debug.WriteLine("Allocate: Moving head offset from {0:X} to {1:X}", oldHeadAddress, newHeadAddress); epoch.BumpCurrentEpoch(() => OnPagesClosed(newHeadAddress)); @@ -1104,35 +1104,10 @@ protected void ShiftFlushedUntilAddress() if (update) { - MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress); + Utility.MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress); } } - - - /// - /// Used by several functions to update the variable to newValue. Ignores if newValue is smaller or - /// than the current value. - /// - /// - /// - /// - /// - private bool MonotonicUpdate(ref long variable, long newValue, out long oldValue) - { - oldValue = variable; - while (oldValue < newValue) - { - var foundValue = Interlocked.CompareExchange(ref variable, newValue, oldValue); - if (foundValue == oldValue) - { - return true; - } - oldValue = foundValue; - } - return false; - } - /// /// Reset for recovery /// diff --git a/cs/src/core/Utilities/Utility.cs b/cs/src/core/Utilities/Utility.cs index 57ab7168f..375b20cfc 100644 --- a/cs/src/core/Utilities/Utility.cs +++ b/cs/src/core/Utilities/Utility.cs @@ -223,5 +223,39 @@ internal static int Murmur3(int h) a ^= a >> 16; return (int)a; } + + /// + /// Updates the variable to newValue only if the current value is smaller than the new value. + /// + /// The variable to possibly replace + /// The value that replaces the variable if successful + /// The orignal value in the variable + /// if oldValue less than newValue + public static bool MonotonicUpdate(ref long variable, long newValue, out long oldValue) + { + do + { + oldValue = variable; + if (oldValue > newValue) return false; + } while (Interlocked.CompareExchange(ref variable, newValue, oldValue) != oldValue); + return true; + } + /// + /// Updates the variable to newValue only if the current value is smaller than the new value. + /// + /// The variable to possibly replace + /// The value that replaces the variable if successful + /// The orignal value in the variable + /// if oldValue less than newValue + public static bool MonotonicUpdate(ref int variable, int newValue, out int oldValue) + { + do + { + oldValue = variable; + if (oldValue > newValue) return false; + } while (Interlocked.CompareExchange(ref variable, newValue, oldValue) != oldValue); + return true; + } + } } From 20aec4c5f43b351a86a5ebce4752aa9b5340bad9 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 16 Jul 2019 11:36:28 -0700 Subject: [PATCH 45/56] Revert "Pick out the MonotonicUpdate function into utility" This reverts commit e46ea6e860b73fb19594eceedfb98f05eb6b2055. --- cs/src/core/Allocator/AllocatorBase.cs | 51 +++++++++++++++++++------- cs/src/core/Utilities/Utility.cs | 34 ----------------- 2 files changed, 38 insertions(+), 47 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 13d6061ed..c2cf8dba8 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -870,7 +870,7 @@ public void ShiftReadOnlyToTail(out long tailAddress) tailAddress = GetTailAddress(); long localTailAddress = tailAddress; long currentReadOnlyOffset = ReadOnlyAddress; - if (Utility.MonotonicUpdate(ref ReadOnlyAddress, tailAddress, out long oldReadOnlyOffset)) + if (MonotonicUpdate(ref ReadOnlyAddress, tailAddress, out long oldReadOnlyOffset)) { epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(localTailAddress, false)); } @@ -882,7 +882,7 @@ public void ShiftReadOnlyToTail(out long tailAddress) /// public bool ShiftReadOnlyAddress(long newReadOnlyAddress) { - if (Utility.MonotonicUpdate(ref ReadOnlyAddress, newReadOnlyAddress, out long oldReadOnlyOffset)) + if (MonotonicUpdate(ref ReadOnlyAddress, newReadOnlyAddress, out long oldReadOnlyOffset)) { epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(newReadOnlyAddress, false)); return true; @@ -897,19 +897,19 @@ public bool ShiftReadOnlyAddress(long newReadOnlyAddress) public void ShiftBeginAddress(long newBeginAddress) { // First update the begin address - Utility.MonotonicUpdate(ref BeginAddress, newBeginAddress, out long oldBeginAddress); + MonotonicUpdate(ref BeginAddress, newBeginAddress, out long oldBeginAddress); // Then the head address - var h = Utility.MonotonicUpdate(ref HeadAddress, newBeginAddress, out long old); + var h = MonotonicUpdate(ref HeadAddress, newBeginAddress, out long old); // Finally the read-only address - var r = Utility.MonotonicUpdate(ref ReadOnlyAddress, newBeginAddress, out old); + var r = MonotonicUpdate(ref ReadOnlyAddress, newBeginAddress, out old); // Clean up until begin address epoch.BumpCurrentEpoch(() => { if (r) { - Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newBeginAddress, out long _old); - Utility.MonotonicUpdate(ref FlushedUntilAddress, newBeginAddress, out _old); + MonotonicUpdate(ref SafeReadOnlyAddress, newBeginAddress, out long _old); + MonotonicUpdate(ref FlushedUntilAddress, newBeginAddress, out _old); } if (h) OnPagesClosed(newBeginAddress); @@ -935,7 +935,7 @@ protected virtual void DeleteAddressRange(long fromAddress, long toAddress) /// public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendingFlushComplete = false) { - if (Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress)) + if (MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress)) { Debug.WriteLine("SafeReadOnly shifted from {0:X} to {1:X}", oldSafeReadOnlyAddress, newSafeReadOnlyAddress); long startPage = oldSafeReadOnlyAddress >> LogPageSizeBits; @@ -964,7 +964,7 @@ public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendi /// public void OnPagesClosed(long newSafeHeadAddress) { - if (Utility.MonotonicUpdate(ref SafeHeadAddress, newSafeHeadAddress, out long oldSafeHeadAddress)) + if (MonotonicUpdate(ref SafeHeadAddress, newSafeHeadAddress, out long oldSafeHeadAddress)) { Debug.WriteLine("SafeHeadOffset shifted from {0:X} to {1:X}", oldSafeHeadAddress, newSafeHeadAddress); @@ -1020,7 +1020,7 @@ private void PageAlignedShiftReadOnlyAddress(long currentTailAddress) long currentReadOnlyAddress = ReadOnlyAddress; long pageAlignedTailAddress = currentTailAddress & ~PageSizeMask; long desiredReadOnlyAddress = (pageAlignedTailAddress - ReadOnlyLagAddress); - if (Utility.MonotonicUpdate(ref ReadOnlyAddress, desiredReadOnlyAddress, out long oldReadOnlyAddress)) + if (MonotonicUpdate(ref ReadOnlyAddress, desiredReadOnlyAddress, out long oldReadOnlyAddress)) { Debug.WriteLine("Allocate: Moving read-only offset from {0:X} to {1:X}", oldReadOnlyAddress, desiredReadOnlyAddress); epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(desiredReadOnlyAddress)); @@ -1050,7 +1050,7 @@ private void PageAlignedShiftHeadAddress(long currentTailAddress) if (ReadCache && (newHeadAddress > HeadAddress)) EvictCallback(HeadAddress, newHeadAddress); - if (Utility.MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) + if (MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) { Debug.WriteLine("Allocate: Moving head offset from {0:X} to {1:X}", oldHeadAddress, newHeadAddress); epoch.BumpCurrentEpoch(() => OnPagesClosed(newHeadAddress)); @@ -1075,7 +1075,7 @@ public long ShiftHeadAddress(long desiredHeadAddress) if (ReadCache && (newHeadAddress > HeadAddress)) EvictCallback(HeadAddress, newHeadAddress); - if (Utility.MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) + if (MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress)) { Debug.WriteLine("Allocate: Moving head offset from {0:X} to {1:X}", oldHeadAddress, newHeadAddress); epoch.BumpCurrentEpoch(() => OnPagesClosed(newHeadAddress)); @@ -1104,10 +1104,35 @@ protected void ShiftFlushedUntilAddress() if (update) { - Utility.MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress); + MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress); } } + + + /// + /// Used by several functions to update the variable to newValue. Ignores if newValue is smaller or + /// than the current value. + /// + /// + /// + /// + /// + private bool MonotonicUpdate(ref long variable, long newValue, out long oldValue) + { + oldValue = variable; + while (oldValue < newValue) + { + var foundValue = Interlocked.CompareExchange(ref variable, newValue, oldValue); + if (foundValue == oldValue) + { + return true; + } + oldValue = foundValue; + } + return false; + } + /// /// Reset for recovery /// diff --git a/cs/src/core/Utilities/Utility.cs b/cs/src/core/Utilities/Utility.cs index 375b20cfc..57ab7168f 100644 --- a/cs/src/core/Utilities/Utility.cs +++ b/cs/src/core/Utilities/Utility.cs @@ -223,39 +223,5 @@ internal static int Murmur3(int h) a ^= a >> 16; return (int)a; } - - /// - /// Updates the variable to newValue only if the current value is smaller than the new value. - /// - /// The variable to possibly replace - /// The value that replaces the variable if successful - /// The orignal value in the variable - /// if oldValue less than newValue - public static bool MonotonicUpdate(ref long variable, long newValue, out long oldValue) - { - do - { - oldValue = variable; - if (oldValue > newValue) return false; - } while (Interlocked.CompareExchange(ref variable, newValue, oldValue) != oldValue); - return true; - } - /// - /// Updates the variable to newValue only if the current value is smaller than the new value. - /// - /// The variable to possibly replace - /// The value that replaces the variable if successful - /// The orignal value in the variable - /// if oldValue less than newValue - public static bool MonotonicUpdate(ref int variable, int newValue, out int oldValue) - { - do - { - oldValue = variable; - if (oldValue > newValue) return false; - } while (Interlocked.CompareExchange(ref variable, newValue, oldValue) != oldValue); - return true; - } - } } From 7e56bba00dfe2ab186ed2eab44003be87c7b6892 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 16 Jul 2019 15:26:54 -0700 Subject: [PATCH 46/56] Deal with creation race condition --- cs/src/cloud/AzurePageBlobDevice.cs | 150 +++++++++++++++++++++++----- 1 file changed, 123 insertions(+), 27 deletions(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index 95e61c2a0..8f16f0691 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -5,6 +5,7 @@ using System.Collections.Concurrent; using System.Diagnostics; using System.IO; +using System.Runtime.CompilerServices; using System.Threading; using FASTER.core; using Microsoft.Azure.Storage; @@ -18,8 +19,95 @@ namespace FASTER.cloud /// public class AzurePageBlobDevice : StorageDeviceBase { + // This class bundles a page blob object with a queue and a counter to ensure + // 1) BeginCreate is not called more than once + // 2) No writes are issued before EndCreate + // The creator of a BlobEntry is responsible for populating the object with an underlying Page Blob. Any subsequent callers + // either directly write to the created page blob, or queues the write so the creator can clear it after creation is complete. + // In-progress creation is denoted by a null value on the underlying page blob + private class BlobEntry + { + private CloudPageBlob pageBlob; + private ConcurrentQueue> pendingWrites; + private int waitingCount; + + /// + /// Creates a new BlobEntry, does not initialize a page blob. Use + /// for actual creation. + /// + public BlobEntry() + { + pageBlob = null; + pendingWrites = new ConcurrentQueue>(); + waitingCount = 0; + } + + /// + /// Getter for the underlying + /// + /// the underlying , or null if there is none + public CloudPageBlob GetPageBlob() + { + return pageBlob; + } + + /// + /// Asynchronously invoke create on the given pageBlob. + /// + /// maximum size of the blob + /// The page blob to create + public void CreateAsync(long size, CloudPageBlob pageBlob) + { + Debug.Assert(waitingCount == 0, "Create should be called on blobs that don't already exist and exactly once"); + // Asynchronously create the blob + pageBlob.BeginCreate(size, ar => + { + // TODO(Tianyu): Need to wrap this for exceptions? + pageBlob.EndCreate(ar); + // At this point the blob is fully created. After this line all consequent writers will write immediately. We just + // need to clear the queue of pending writers. + this.pageBlob = pageBlob; + // Take a snapshot of the current waiting count. Exactly this many actions will be cleared. + // Swapping in -1 will inform any stragglers that we are not taking their actions and prompt them to retry (and call write directly) + int waitingCountSnapshot = Interlocked.Exchange(ref waitingCount, -1); + Action action; + // Clear actions + for (int i = 0; i < waitingCountSnapshot; i++) + { + // inserts into the queue may lag behind the creation thread. We have to wait until that happens. + // This is so rare, that we are probably okay with a busy wait. + while (!pendingWrites.TryDequeue(out action)) { } + action(pageBlob); + } + // Mark for deallocation for the GC + pendingWrites = null; + }, null); + } + + /// + /// Attempts to enqueue an action to be invoked by the creator after creation is done. Should only be invoked when + /// creation is in-flight. This call is allowed to fail (and return false) if concurrently the creation is complete. + /// The caller should call the write action directly instead of queueing in this case. + /// + /// The write action to perform + /// Whether the action was successfully enqueued + public bool TryQueueAction(Action writeAction) + { + int currentCount; + do + { + currentCount = waitingCount; + // If current count became -1, creation is complete. New queue entries will not be processed and we must call the action ourselves. + if (currentCount == -1) return false; + } while (Interlocked.CompareExchange(ref waitingCount, currentCount + 1, currentCount) != currentCount); + // Enqueue last. The creation thread is obliged to wait until it has processed waitingCount many actions. + // It is extremely unlikely that we will get scheduled out here anyways. + pendingWrites.Enqueue(writeAction); + return true; + } + } private CloudBlobContainer container; - private readonly ConcurrentDictionary blobs; + private readonly ConcurrentDictionary blobs; private readonly string blobName; private readonly bool deleteOnClose; @@ -45,7 +133,7 @@ public AzurePageBlobDevice(string connectionString, string containerName, string CloudBlobClient client = storageAccount.CreateCloudBlobClient(); container = client.GetContainerReference(containerName); container.CreateIfNotExists(); - blobs = new ConcurrentDictionary(); + blobs = new ConcurrentDictionary(); this.blobName = blobName; this.deleteOnClose = deleteOnClose; } @@ -63,7 +151,7 @@ public override void Close() { foreach (var entry in blobs) { - entry.Value.Delete(); + entry.Value.GetPageBlob().Delete(); } } } @@ -75,9 +163,9 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) { for (int i = fromSegment; i < toSegment; i++) { - if (blobs.TryRemove(i, out CloudPageBlob blob)) + if (blobs.TryRemove(i, out BlobEntry blob)) { - blob.Delete(); + blob.GetPageBlob().Delete(); } } } @@ -88,13 +176,14 @@ public override void DeleteSegmentRange(int fromSegment, int toSegment) public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr destinationAddress, uint readLength, IOCompletionCallback callback, IAsyncResult asyncResult) { // It is up to the allocator to make sure no reads are issued to segments before they are written - if (!blobs.TryGetValue(segmentId, out CloudPageBlob pageBlob)) throw new InvalidOperationException("Attempting to read non-existent segments"); + if (!blobs.TryGetValue(segmentId, out BlobEntry blobEntry)) throw new InvalidOperationException("Attempting to read non-existent segments"); // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero); UnmanagedMemoryStream stream = new UnmanagedMemoryStream((byte*)destinationAddress, readLength, readLength, FileAccess.Write); + CloudPageBlob pageBlob = blobEntry.GetPageBlob(); pageBlob.BeginDownloadRangeToStream(stream, (Int64)sourceAddress, readLength, ar => { try { @@ -116,30 +205,39 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr /// public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { - if (!blobs.TryGetValue(segmentId, out CloudPageBlob pageBlob)) + if (!blobs.TryGetValue(segmentId, out BlobEntry blobEntry)) { - // If no blob exists for the segment, we must first create the segment asynchronouly. (Create call takes ~70 ms by measurement) - pageBlob = container.GetPageBlobReference(blobName + segmentId); - - // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because - // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of - // how large it can grow to. - var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; - // It is up to allocator to ensure that no reads happen before the callback of this function is invoked. - pageBlob.BeginCreate(size, ar => + BlobEntry entry = new BlobEntry(); + if (blobs.TryAdd(segmentId, entry)) { - pageBlob.EndCreate(ar); - }, null); - WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); - blobs.TryAdd(segmentId, pageBlob); - } - else - { - // Write directly to the existing blob otherwise - WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + CloudPageBlob pageBlob = container.GetPageBlobReference(blobName + segmentId); + // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because + // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of + // how large it can grow to. + var size = segmentSize == -1 ? MAX_BLOB_SIZE : segmentSize; + // If no blob exists for the segment, we must first create the segment asynchronouly. (Create call takes ~70 ms by measurement) + // After creation is done, we can call write. + entry.CreateAsync(size, pageBlob); + } + // Otherwise, some other thread beat us to it. Okay to use their blobs. + blobEntry = blobs[segmentId]; } + TryWriteAsync(blobEntry, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void TryWriteAsync(BlobEntry blobEntry, IntPtr sourceAddress, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) + { + CloudPageBlob pageBlob = blobEntry.GetPageBlob(); + // If pageBlob is null, it is being created. Attempt to queue the write for the creator to complete after it is done + if (pageBlob == null + && blobEntry.TryQueueAction(p => WriteToBlobAsync(p, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult))) return; + + // Otherwise, invoke directly. + WriteToBlobAsync(pageBlob, sourceAddress, destinationAddress, numBytesToWrite, callback, asyncResult); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAddress, ulong destinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult) { // Even though Azure Page Blob does not make use of Overlapped, we populate one to conform to the callback API @@ -164,6 +262,4 @@ private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAdd }, asyncResult); } } - - } From f984cc47c0d868f1ff351acbcffc4ffa22d323a0 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 16 Jul 2019 15:42:58 -0700 Subject: [PATCH 47/56] Revert debug code --- cs/src/cloud/AzurePageBlobDevice.cs | 14 +++++++++++--- cs/src/core/Allocator/AllocatorBase.cs | 6 +++--- cs/src/core/Allocator/GenericAllocator.cs | 6 +++--- cs/test/BasicDiskFASTERTests.cs | 7 ++----- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/cloud/AzurePageBlobDevice.cs index 8f16f0691..dccfa0644 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/cloud/AzurePageBlobDevice.cs @@ -62,8 +62,15 @@ public void CreateAsync(long size, CloudPageBlob pageBlob) // Asynchronously create the blob pageBlob.BeginCreate(size, ar => { - // TODO(Tianyu): Need to wrap this for exceptions? - pageBlob.EndCreate(ar); + try + { + pageBlob.EndCreate(ar); + } + catch (Exception e) + { + // TODO(Tianyu): Can't really do better without knowing error behavior + Trace.TraceError(e.Message); + } // At this point the blob is fully created. After this line all consequent writers will write immediately. We just // need to clear the queue of pending writers. this.pageBlob = pageBlob; @@ -192,6 +199,7 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, IntPtr // I don't think I can be more specific in catch here because no documentation on exception behavior is provided catch (Exception e) { + Trace.TraceError(e.Message); // Is there any documentation on the meaning of error codes here? The handler suggests that any non-zero value is an error // but does not distinguish between them. callback(2, readLength, ovNative); @@ -253,7 +261,7 @@ private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAdd // I don't think I can be more specific in catch here because no documentation on exception behavior is provided catch (Exception e) { - Console.WriteLine(e.Message); + Trace.TraceError(e.Message); // Is there any documentation on the meaning of error codes here? The handler suggests that any non-zero value is an error // but does not distinguish between them. callback(1, numBytesToWrite, ovNative); diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index c2cf8dba8..558ac6f52 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -1473,7 +1473,7 @@ private void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, NativeOverl { if (errorCode != 0) { - Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } var result = (AsyncGetFromDiskResult>)Overlapped.Unpack(overlap).AsyncResult; @@ -1532,7 +1532,7 @@ private void AsyncFlushPageCallback(uint errorCode, uint numBytes, NativeOverlap { if (errorCode != 0) { - Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } // Set the page status to flushed @@ -1575,7 +1575,7 @@ private void AsyncFlushPageToDeviceCallback(uint errorCode, uint numBytes, Nativ { if (errorCode != 0) { - Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } PageAsyncFlushResult result = (PageAsyncFlushResult)Overlapped.Unpack(overlap).AsyncResult; diff --git a/cs/src/core/Allocator/GenericAllocator.cs b/cs/src/core/Allocator/GenericAllocator.cs index b6e136c3a..04b4fbb37 100644 --- a/cs/src/core/Allocator/GenericAllocator.cs +++ b/cs/src/core/Allocator/GenericAllocator.cs @@ -445,7 +445,7 @@ private void AsyncReadPageCallback(uint errorCode, uint numBytes, NativeOverlapp { if (errorCode != 0) { - Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } // Set the page status to flushed @@ -494,7 +494,7 @@ private void AsyncFlushPartialObjectLogCallback(uint errorCode, uint n { if (errorCode != 0) { - Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } // Set the page status to flushed @@ -508,7 +508,7 @@ private void AsyncReadPageWithObjectsCallback(uint errorCode, uint num { if (errorCode != 0) { - Console.WriteLine("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } PageAsyncReadResult result = (PageAsyncReadResult)Overlapped.Unpack(overlap).AsyncResult; diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index 049020596..d351bd6d1 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -37,7 +37,7 @@ public TestContext TestContext set { testContextInstance = value; } } - void TestDeviceWriteRead(IDevice log, bool debug = false) + void TestDeviceWriteRead(IDevice log) { this.log = log; fht = new FasterKV @@ -48,13 +48,11 @@ void TestDeviceWriteRead(IDevice log, bool debug = false) for (int i = 0; i < 2000; i++) { - if (debug && i % 500 == 0) TestContext.WriteLine("inserted {0} tuples", i); var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; fht.Upsert(ref key1, ref value, Empty.Default, 0); } fht.CompletePending(true); - if (debug) TestContext.WriteLine("Write pending cleared"); // Update first 100 using RMW from storage for (int i = 0; i < 100; i++) @@ -64,7 +62,6 @@ void TestDeviceWriteRead(IDevice log, bool debug = false) var status = fht.RMW(ref key1, ref input, Empty.Default, 0); if (status == Status.PENDING) fht.CompletePending(true); - if (debug && i % 10 == 0) TestContext.WriteLine("Modified {0} tuples\n", i); } @@ -112,7 +109,7 @@ public void NativeDiskWriteRead() [Test] public void PageBlobWriteRead() { - TestDeviceWriteRead(new AzurePageBlobDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false), true); + TestDeviceWriteRead(new AzurePageBlobDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); } } } From ecfb8de0ca9bdfb84e980650031b9a1345a641fb Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 17 Jul 2019 16:25:30 -0700 Subject: [PATCH 48/56] Cleanup and condition test execution --- cs/FASTER.sln | 26 ++--- cs/src/core/FASTER.core.csproj | 14 --- .../AzureStorageDevice/AzureStorageDevice.cs} | 104 +---------------- .../devices/AzureStorageDevice/BlobEntry.cs | 106 ++++++++++++++++++ .../FASTER.devices.AzureStorageDevice.csproj} | 12 +- .../FASTER.devices.AzureStorageDevice.nuspec | 34 ++++++ cs/test/BasicDiskFASTERTests.cs | 44 ++++---- cs/test/FASTER.test.csproj | 2 +- 8 files changed, 182 insertions(+), 160 deletions(-) rename cs/src/{cloud/AzurePageBlobDevice.cs => devices/AzureStorageDevice/AzureStorageDevice.cs} (62%) create mode 100644 cs/src/devices/AzureStorageDevice/BlobEntry.cs rename cs/src/{cloud/FASTER.cloud.csproj => devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.csproj} (76%) create mode 100644 cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.nuspec diff --git a/cs/FASTER.sln b/cs/FASTER.sln index e52a2c076..fd956807d 100644 --- a/cs/FASTER.sln +++ b/cs/FASTER.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27004.2008 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29102.190 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FASTER.benchmark", "benchmark\FASTER.benchmark.csproj", "{33A732D1-2B58-4FEE-9696-B9483496229F}" EndProject @@ -38,9 +38,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ClassCacheMT", "playground\ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VarLenStructSample", "playground\VarLenStructSample\VarLenStructSample.csproj", "{37B3C501-A7A1-4E86-B766-22F9BEF31DFE}" EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "cloud", "cloud", "{A6B14415-D316-4955-BE5F-725BB2DEBEBE}" +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "devices", "devices", "{A6B14415-D316-4955-BE5F-725BB2DEBEBE}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FASTER.cloud", "src\cloud\FASTER.cloud.csproj", "{ECF8EE9C-0D02-4EB3-8A25-6A318719F029}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FASTER.devices.AzureStorageDevice", "src\devices\AzureStorageDevice\FASTER.devices.AzureStorageDevice.csproj", "{E571E686-01A0-44D5-BFF5-B7678284258B}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -138,14 +138,14 @@ Global {37B3C501-A7A1-4E86-B766-22F9BEF31DFE}.Release|Any CPU.Build.0 = Release|x64 {37B3C501-A7A1-4E86-B766-22F9BEF31DFE}.Release|x64.ActiveCfg = Release|x64 {37B3C501-A7A1-4E86-B766-22F9BEF31DFE}.Release|x64.Build.0 = Release|x64 - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Debug|Any CPU.Build.0 = Debug|Any CPU - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Debug|x64.ActiveCfg = Debug|Any CPU - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Debug|x64.Build.0 = Debug|Any CPU - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Release|Any CPU.ActiveCfg = Release|Any CPU - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Release|Any CPU.Build.0 = Release|Any CPU - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Release|x64.ActiveCfg = Release|Any CPU - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029}.Release|x64.Build.0 = Release|Any CPU + {E571E686-01A0-44D5-BFF5-B7678284258B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E571E686-01A0-44D5-BFF5-B7678284258B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E571E686-01A0-44D5-BFF5-B7678284258B}.Debug|x64.ActiveCfg = Debug|x64 + {E571E686-01A0-44D5-BFF5-B7678284258B}.Debug|x64.Build.0 = Debug|x64 + {E571E686-01A0-44D5-BFF5-B7678284258B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E571E686-01A0-44D5-BFF5-B7678284258B}.Release|Any CPU.Build.0 = Release|Any CPU + {E571E686-01A0-44D5-BFF5-B7678284258B}.Release|x64.ActiveCfg = Release|x64 + {E571E686-01A0-44D5-BFF5-B7678284258B}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -164,7 +164,7 @@ Global {F989FF23-5DD7-4D8F-9458-BDA22EFC038D} = {E6026D6A-01C5-4582-B2C1-64751490DABE} {37B3C501-A7A1-4E86-B766-22F9BEF31DFE} = {E6026D6A-01C5-4582-B2C1-64751490DABE} {A6B14415-D316-4955-BE5F-725BB2DEBEBE} = {28800357-C8CE-4CD0-A2AD-D4A910ABB496} - {ECF8EE9C-0D02-4EB3-8A25-6A318719F029} = {A6B14415-D316-4955-BE5F-725BB2DEBEBE} + {E571E686-01A0-44D5-BFF5-B7678284258B} = {A6B14415-D316-4955-BE5F-725BB2DEBEBE} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {A0750637-2CCB-4139-B25E-F2CE740DCFAC} diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj index c2dbeca27..1d68a5425 100644 --- a/cs/src/core/FASTER.core.csproj +++ b/cs/src/core/FASTER.core.csproj @@ -37,18 +37,4 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/cs/src/cloud/AzurePageBlobDevice.cs b/cs/src/devices/AzureStorageDevice/AzureStorageDevice.cs similarity index 62% rename from cs/src/cloud/AzurePageBlobDevice.cs rename to cs/src/devices/AzureStorageDevice/AzureStorageDevice.cs index dccfa0644..cf3491e57 100644 --- a/cs/src/cloud/AzurePageBlobDevice.cs +++ b/cs/src/devices/AzureStorageDevice/AzureStorageDevice.cs @@ -11,108 +11,14 @@ using Microsoft.Azure.Storage; using Microsoft.Azure.Storage.Blob; -namespace FASTER.cloud +namespace FASTER.devices { /// /// A IDevice Implementation that is backed byAzure Page Blob. - /// This device is expected to be an order of magnitude slower than local SSD or HDD, but provide scalability and shared access in the cloud. + /// This device is slower than a local SSD or HDD, but provides scalability and shared access in the cloud. /// - public class AzurePageBlobDevice : StorageDeviceBase + public class AzureStorageDevice : StorageDeviceBase { - // This class bundles a page blob object with a queue and a counter to ensure - // 1) BeginCreate is not called more than once - // 2) No writes are issued before EndCreate - // The creator of a BlobEntry is responsible for populating the object with an underlying Page Blob. Any subsequent callers - // either directly write to the created page blob, or queues the write so the creator can clear it after creation is complete. - // In-progress creation is denoted by a null value on the underlying page blob - private class BlobEntry - { - private CloudPageBlob pageBlob; - private ConcurrentQueue> pendingWrites; - private int waitingCount; - - /// - /// Creates a new BlobEntry, does not initialize a page blob. Use - /// for actual creation. - /// - public BlobEntry() - { - pageBlob = null; - pendingWrites = new ConcurrentQueue>(); - waitingCount = 0; - } - - /// - /// Getter for the underlying - /// - /// the underlying , or null if there is none - public CloudPageBlob GetPageBlob() - { - return pageBlob; - } - - /// - /// Asynchronously invoke create on the given pageBlob. - /// - /// maximum size of the blob - /// The page blob to create - public void CreateAsync(long size, CloudPageBlob pageBlob) - { - Debug.Assert(waitingCount == 0, "Create should be called on blobs that don't already exist and exactly once"); - // Asynchronously create the blob - pageBlob.BeginCreate(size, ar => - { - try - { - pageBlob.EndCreate(ar); - } - catch (Exception e) - { - // TODO(Tianyu): Can't really do better without knowing error behavior - Trace.TraceError(e.Message); - } - // At this point the blob is fully created. After this line all consequent writers will write immediately. We just - // need to clear the queue of pending writers. - this.pageBlob = pageBlob; - // Take a snapshot of the current waiting count. Exactly this many actions will be cleared. - // Swapping in -1 will inform any stragglers that we are not taking their actions and prompt them to retry (and call write directly) - int waitingCountSnapshot = Interlocked.Exchange(ref waitingCount, -1); - Action action; - // Clear actions - for (int i = 0; i < waitingCountSnapshot; i++) - { - // inserts into the queue may lag behind the creation thread. We have to wait until that happens. - // This is so rare, that we are probably okay with a busy wait. - while (!pendingWrites.TryDequeue(out action)) { } - action(pageBlob); - } - // Mark for deallocation for the GC - pendingWrites = null; - }, null); - } - - /// - /// Attempts to enqueue an action to be invoked by the creator after creation is done. Should only be invoked when - /// creation is in-flight. This call is allowed to fail (and return false) if concurrently the creation is complete. - /// The caller should call the write action directly instead of queueing in this case. - /// - /// The write action to perform - /// Whether the action was successfully enqueued - public bool TryQueueAction(Action writeAction) - { - int currentCount; - do - { - currentCount = waitingCount; - // If current count became -1, creation is complete. New queue entries will not be processed and we must call the action ourselves. - if (currentCount == -1) return false; - } while (Interlocked.CompareExchange(ref waitingCount, currentCount + 1, currentCount) != currentCount); - // Enqueue last. The creation thread is obliged to wait until it has processed waitingCount many actions. - // It is extremely unlikely that we will get scheduled out here anyways. - pendingWrites.Enqueue(writeAction); - return true; - } - } private CloudBlobContainer container; private readonly ConcurrentDictionary blobs; private readonly string blobName; @@ -124,7 +30,7 @@ public bool TryQueueAction(Action writeAction) private const uint PAGE_BLOB_SECTOR_SIZE = 512; /// - /// Constructs a new AzurePageBlobDevice instance + /// Constructs a new AzureStorageDevice instance, backed by Azure Page Blobs /// /// The connection string to use when estblishing connection to Azure Blobs /// Name of the Azure Blob container to use. If there does not exist a container with the supplied name, one is created @@ -133,7 +39,7 @@ public bool TryQueueAction(Action writeAction) /// True if the program should delete all blobs created on call to Close. False otherwise. /// The container is not deleted even if it was created in this constructor /// - public AzurePageBlobDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false) + public AzureStorageDevice(string connectionString, string containerName, string blobName, bool deleteOnClose = false) : base(connectionString + "/" + containerName + "/" + blobName, PAGE_BLOB_SECTOR_SIZE) { CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString); diff --git a/cs/src/devices/AzureStorageDevice/BlobEntry.cs b/cs/src/devices/AzureStorageDevice/BlobEntry.cs new file mode 100644 index 000000000..e70d20b6d --- /dev/null +++ b/cs/src/devices/AzureStorageDevice/BlobEntry.cs @@ -0,0 +1,106 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Threading; +using Microsoft.Azure.Storage.Blob; + +namespace FASTER.devices +{ + // This class bundles a page blob object with a queue and a counter to ensure + // 1) BeginCreate is not called more than once + // 2) No writes are issued before EndCreate + // The creator of a BlobEntry is responsible for populating the object with an underlying Page Blob. Any subsequent callers + // either directly write to the created page blob, or queues the write so the creator can clear it after creation is complete. + // In-progress creation is denoted by a null value on the underlying page blob + class BlobEntry + { + private CloudPageBlob pageBlob; + private ConcurrentQueue> pendingWrites; + private int waitingCount; + + /// + /// Creates a new BlobEntry, does not initialize a page blob. Use + /// for actual creation. + /// + public BlobEntry() + { + pageBlob = null; + pendingWrites = new ConcurrentQueue>(); + waitingCount = 0; + } + + /// + /// Getter for the underlying + /// + /// the underlying , or null if there is none + public CloudPageBlob GetPageBlob() + { + return pageBlob; + } + + /// + /// Asynchronously invoke create on the given pageBlob. + /// + /// maximum size of the blob + /// The page blob to create + public void CreateAsync(long size, CloudPageBlob pageBlob) + { + Debug.Assert(waitingCount == 0, "Create should be called on blobs that don't already exist and exactly once"); + // Asynchronously create the blob + pageBlob.BeginCreate(size, ar => + { + try + { + pageBlob.EndCreate(ar); + } + catch (Exception e) + { + // TODO(Tianyu): Can't really do better without knowing error behavior + Trace.TraceError(e.Message); + } + // At this point the blob is fully created. After this line all consequent writers will write immediately. We just + // need to clear the queue of pending writers. + this.pageBlob = pageBlob; + // Take a snapshot of the current waiting count. Exactly this many actions will be cleared. + // Swapping in -1 will inform any stragglers that we are not taking their actions and prompt them to retry (and call write directly) + int waitingCountSnapshot = Interlocked.Exchange(ref waitingCount, -1); + Action action; + // Clear actions + for (int i = 0; i < waitingCountSnapshot; i++) + { + // inserts into the queue may lag behind the creation thread. We have to wait until that happens. + // This is so rare, that we are probably okay with a busy wait. + while (!pendingWrites.TryDequeue(out action)) { } + action(pageBlob); + } + // Mark for deallocation for the GC + pendingWrites = null; + }, null); + } + + /// + /// Attempts to enqueue an action to be invoked by the creator after creation is done. Should only be invoked when + /// creation is in-flight. This call is allowed to fail (and return false) if concurrently the creation is complete. + /// The caller should call the write action directly instead of queueing in this case. + /// + /// The write action to perform + /// Whether the action was successfully enqueued + public bool TryQueueAction(Action writeAction) + { + int currentCount; + do + { + currentCount = waitingCount; + // If current count became -1, creation is complete. New queue entries will not be processed and we must call the action ourselves. + if (currentCount == -1) return false; + } while (Interlocked.CompareExchange(ref waitingCount, currentCount + 1, currentCount) != currentCount); + // Enqueue last. The creation thread is obliged to wait until it has processed waitingCount many actions. + // It is extremely unlikely that we will get scheduled out here anyways. + pendingWrites.Enqueue(writeAction); + return true; + } + } +} diff --git a/cs/src/cloud/FASTER.cloud.csproj b/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.csproj similarity index 76% rename from cs/src/cloud/FASTER.cloud.csproj rename to cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.csproj index 393e979ab..de06d0e90 100644 --- a/cs/src/cloud/FASTER.cloud.csproj +++ b/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.csproj @@ -7,14 +7,14 @@ true - FASTER.cloud - FASTER.cloud + FASTER.devices + FASTER.devices.AzureStorageDevice prompt true Library - ../../FASTER.snk + ../../../FASTER.snk false bin\$(Platform)\$(Configuration)\$(TargetFramework)\$(AssemblyName).xml @@ -36,13 +36,9 @@ - - - - - + \ No newline at end of file diff --git a/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.nuspec b/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.nuspec new file mode 100644 index 000000000..0694ac4c4 --- /dev/null +++ b/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.nuspec @@ -0,0 +1,34 @@ + + + + FASTER.devices.AzureStorageDevice + $version$ + FASTER.devices.AzureStorageDevice + Microsoft + Microsoft + https://github.com/Microsoft/FASTER + https://raw.githubusercontent.com/Microsoft/FASTER/master/LICENSE + true + IDevice FASTER is a fast concurrent key-value store that also supports indexing of larger-than-memory data. This is a FASTER IDevice implementation for Azure Storage. + See the project website at https://github.com/Microsoft/FASTER for more details + © Microsoft Corporation. All rights reserved. + en-US + key-value store dictionary hashtable concurrent log persistent azure storage FASTER + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index d351bd6d1..d14c291a9 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -10,31 +10,30 @@ using FASTER.core; using System.IO; using NUnit.Framework; -using FASTER.cloud; +using FASTER.devices; using System.Diagnostics; namespace FASTER.test { - - - // TODO(Tianyu): Now that we are also testing device with Azure Page Blobs here, should we also rename the test? [TestFixture] - internal class BasicDiskFASTERTests + internal class BasicStorageFASTERTests { - private TestContext testContextInstance; private FasterKV fht; private IDevice log; public const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; public const string TEST_CONTAINER = "test"; - /// - /// Gets or sets the test context which provides - /// information about and functionality for the current test run. - /// - public TestContext TestContext + [Test] + public void LocalStorageWriteRead() + { + TestDeviceWriteRead(Devices.CreateLogDevice(TestContext.CurrentContext.TestDirectory + "\\BasicDiskFASTERTests.log", deleteOnClose: true)); + } + + [Test] + public void PageBlobWriteRead() { - get { return testContextInstance; } - set { testContextInstance = value; } + if (AzureStorageEmulator.IsProcessStarted()) + TestDeviceWriteRead(new AzureStorageDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); } void TestDeviceWriteRead(IDevice log) @@ -89,27 +88,22 @@ void TestDeviceWriteRead(IDevice log) } } } - } - [TearDown] - public void TearDown() - { fht.StopSession(); fht.Dispose(); fht = null; log.Close(); } + } - [Test] - public void NativeDiskWriteRead() - { - TestDeviceWriteRead(Devices.CreateLogDevice(TestContext.CurrentContext.TestDirectory + "\\BasicDiskFASTERTests.log", deleteOnClose: true)); - } + internal static class AzureStorageEmulator + { + private const string _win7ProcessName = "WAStorageEmulator"; + private const string _win8ProcessName = "WASTOR~1"; - [Test] - public void PageBlobWriteRead() + public static bool IsProcessStarted() { - TestDeviceWriteRead(new AzurePageBlobDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); + return null != (Process.GetProcessesByName(_win7ProcessName).FirstOrDefault() ?? Process.GetProcessesByName(_win8ProcessName).FirstOrDefault()); } } } diff --git a/cs/test/FASTER.test.csproj b/cs/test/FASTER.test.csproj index 0a4d9cec3..41d0226cc 100644 --- a/cs/test/FASTER.test.csproj +++ b/cs/test/FASTER.test.csproj @@ -43,7 +43,7 @@ - + From d9c529f234812acd5565a1532439cfa8235479b1 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 17 Jul 2019 16:45:35 -0700 Subject: [PATCH 49/56] minor fix --- cs/test/BasicDiskFASTERTests.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index d14c291a9..8299cac2e 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -19,7 +19,6 @@ namespace FASTER.test internal class BasicStorageFASTERTests { private FasterKV fht; - private IDevice log; public const string EMULATED_STORAGE_STRING = "UseDevelopmentStorage=true;"; public const string TEST_CONTAINER = "test"; @@ -38,7 +37,6 @@ public void PageBlobWriteRead() void TestDeviceWriteRead(IDevice log) { - this.log = log; fht = new FasterKV (1L << 20, new Functions(), new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 10 }); fht.StartSession(); From e7e74063f57658ecb428e1ba2e6ede741bc2b299 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Wed, 17 Jul 2019 17:00:29 -0700 Subject: [PATCH 50/56] dispose event handles --- cs/src/core/Device/StorageDeviceBase.cs | 22 +++++++++++++++------- cs/src/core/Device/TieredStorageDevice.cs | 13 +++++++++++-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index bc5b8484e..4a0955903 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -170,7 +170,11 @@ public virtual void TruncateUntilSegmentAsync(int toSegment, AsyncCallback callb for (int i = oldStart; i < toSegment; i++) { RemoveSegmentAsync(i, r => { - if (countdown.Signal()) callback(r); + if (countdown.Signal()) + { + callback(r); + countdown.Dispose(); + } }, result); } }); @@ -178,9 +182,11 @@ public virtual void TruncateUntilSegmentAsync(int toSegment, AsyncCallback callb public virtual void TruncateUntilSegment(int toSegment) { - ManualResetEventSlim completionEvent = new ManualResetEventSlim(false); - TruncateUntilSegmentAsync(toSegment, r => completionEvent.Set(), null); - completionEvent.Wait(); + using (ManualResetEventSlim completionEvent = new ManualResetEventSlim(false)) + { + TruncateUntilSegmentAsync(toSegment, r => completionEvent.Set(), null); + completionEvent.Wait(); + } } public virtual void TruncateUntilAddressAsync(long toAddress, AsyncCallback callback, IAsyncResult result) @@ -191,9 +197,11 @@ public virtual void TruncateUntilAddressAsync(long toAddress, AsyncCallback call public virtual void TruncateUntilAddress(long toAddress) { - ManualResetEventSlim completionEvent = new ManualResetEventSlim(false); - TruncateUntilAddressAsync(toAddress, r => completionEvent.Set(), null); - completionEvent.Wait(); + using (ManualResetEventSlim completionEvent = new ManualResetEventSlim(false)) + { + TruncateUntilAddressAsync(toAddress, r => completionEvent.Set(), null); + completionEvent.Wait(); + } } /// diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index fceb66ff2..e8a5e7e6d 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -98,7 +98,12 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, int segmentId, ulon devices[i].WriteAsync(sourceAddress, segmentId, destinationAddress, numBytesToWrite, (e, n, o) => { // The last tier to finish invokes the callback - if (countdown.Signal()) callback(e, n, o); + if (countdown.Signal()) + { + callback(e, n, o); + countdown.Dispose(); + } + }, asyncResult); } else @@ -117,7 +122,11 @@ public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAs { devices[i].RemoveSegmentAsync(segment, r => { - if (countdown.Signal()) callback(r); + if (countdown.Signal()) + { + callback(r); + countdown.Dispose(); + } }, result); } } From 4f248e1b1240472ecc1884dc5d4db127e9effb45 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Wed, 17 Jul 2019 17:24:46 -0700 Subject: [PATCH 51/56] Use an env var for azure tests. --- azure-pipelines.yml | 1 + cs/test/BasicDiskFASTERTests.cs | 15 +++------------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2237f2472..f7e119a9a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,5 +1,6 @@ variables: solution: 'cs/FASTER.sln' + RunAzureTests: 'yes' jobs: - job: 'csharpWindows' diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index 8299cac2e..14b754261 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -31,8 +31,10 @@ public void LocalStorageWriteRead() [Test] public void PageBlobWriteRead() { - if (AzureStorageEmulator.IsProcessStarted()) + if ("yes".Equals(Environment.GetEnvironmentVariable("RunAzureTests"))) TestDeviceWriteRead(new AzureStorageDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); + else + throw new NotImplementedException(); } void TestDeviceWriteRead(IDevice log) @@ -93,15 +95,4 @@ void TestDeviceWriteRead(IDevice log) log.Close(); } } - - internal static class AzureStorageEmulator - { - private const string _win7ProcessName = "WAStorageEmulator"; - private const string _win8ProcessName = "WASTOR~1"; - - public static bool IsProcessStarted() - { - return null != (Process.GetProcessesByName(_win7ProcessName).FirstOrDefault() ?? Process.GetProcessesByName(_win8ProcessName).FirstOrDefault()); - } - } } From 744e442d189cef57b83f0010e53f8b74cac3ba73 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Wed, 17 Jul 2019 17:36:41 -0700 Subject: [PATCH 52/56] Revert debug code --- cs/test/BasicDiskFASTERTests.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index 14b754261..0d8ad497f 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -33,8 +33,6 @@ public void PageBlobWriteRead() { if ("yes".Equals(Environment.GetEnvironmentVariable("RunAzureTests"))) TestDeviceWriteRead(new AzureStorageDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); - else - throw new NotImplementedException(); } void TestDeviceWriteRead(IDevice log) From fbd544bd07598b8d9fdd5d97a44680320d3d6b9e Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 23 Jul 2019 14:37:06 -0700 Subject: [PATCH 53/56] fix merge problem --- cs/src/core/Device/Devices.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index 01b5f3b1e..14a975445 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -43,7 +43,7 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru else #endif { - logDevice = new LocalStorageDevice(logPath, preallocateFile, deleteOnClose, capacity); + logDevice = new LocalStorageDevice(logPath, preallocateFile, deleteOnClose, capacity: capacity); } return logDevice; } From 49d31e1d3702b36a9612fe8822d6e32f6d247a68 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 23 Jul 2019 16:25:54 -0700 Subject: [PATCH 54/56] Add recovery of segment range for base (non-composite) devices --- cs/src/core/Allocator/AllocatorBase.cs | 4 ++ cs/src/core/Device/IDevice.cs | 47 +++++++++++++++ cs/src/core/Device/LocalStorageDevice.cs | 37 ++++++++++++ .../core/Device/ManagedLocalStorageDevice.cs | 38 ++++++++++++ cs/src/core/Device/NullDevice.cs | 12 +++- cs/src/core/Device/StorageDeviceBase.cs | 59 +++++++++++++++++-- cs/src/core/Device/TieredStorageDevice.cs | 8 ++- .../AzureStorageDevice/AzureStorageDevice.cs | 44 +++++++++++++- .../devices/AzureStorageDevice/BlobEntry.cs | 19 ++++-- cs/test/BasicDiskFASTERTests.cs | 2 +- 10 files changed, 255 insertions(+), 15 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index da34ab145..8f84b7301 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -917,6 +917,10 @@ public void ShiftBeginAddress(long newBeginAddress) }); } + /// + /// Wraps when an allocator potentially has to interact with multiple devices + /// + /// protected virtual void TruncateUntilAddress(long toAddress) { device.TruncateUntilAddress(toAddress); diff --git a/cs/src/core/Device/IDevice.cs b/cs/src/core/Device/IDevice.cs index 5ea7c816e..8833a71b1 100644 --- a/cs/src/core/Device/IDevice.cs +++ b/cs/src/core/Device/IDevice.cs @@ -27,10 +27,22 @@ public interface IDevice /// long Capacity { get; } + /// + /// A device breaks up each logical log into multiple self-contained segments that are of the same size. + /// It is an atomic unit of data that cannot be partially present on a device (i.e. either the entire segment + /// is present or no data from the segment is present). Examples of this include files or named blobs. This + /// property returns the size of each segment. + /// long SegmentSize { get; } + /// + /// The index of the first segment present on this device + /// int StartSegment { get; } + /// + /// The index of the last segment present on this device + /// int EndSegment { get; } /// @@ -92,16 +104,51 @@ public interface IDevice /// void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult); + /// + /// Truncates the log until the given address. The truncated portion should no longer be accessed as the device is no longer responsible for + /// its maintenance, but physical deletion may not happen immediately. + /// + /// upper bound of truncated address + /// callback to invoke when truncation is complete + /// result to be passed to the callback void TruncateUntilAddressAsync(long toAddress, AsyncCallback callback, IAsyncResult result); + /// + /// Truncates the log until the given address. The truncated portion should no longer be accessed as the device is no longer responsible for + /// its maintenance, but physical deletion may not happen immediately. This version of the function can block. + /// + /// upper bound of truncated address void TruncateUntilAddress(long toAddress); + /// + /// Truncates the log until the given segment. Physical deletion of the given segments are guaranteed to have happened when the callback is invoked. + /// + /// the largest (in index) segment to truncate + /// callback to invoke when truncation is complete + /// result to be passed to the callback void TruncateUntilSegmentAsync(int toSegment, AsyncCallback callback, IAsyncResult result); + /// + /// Truncates the log until the given segment. Physical deletion of the given segments are guaranteed to have happened when the function returns. + /// This version of the function can block. + /// + /// the largest (in index) segment to truncate void TruncateUntilSegment(int toSegment); + /// + /// Removes a single segment from the device. This function should not normally be called. + /// Instead, use + /// + /// index of the segment to remov + /// callback to invoke when removal is complete + /// result to be passed to the callback void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result); + /// + /// Removes a single segment from the device. This function should not normally be called. + /// Instead, use + /// + /// index of the segment to remov void RemoveSegment(int segment); /* Close */ diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index 1363c27c7..9818a9679 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -42,6 +42,33 @@ public LocalStorageDevice(string filename, this.deleteOnClose = deleteOnClose; this.disableFileBuffering = disableFileBuffering; logHandles = new SafeConcurrentDictionary(); + RecoverFiles(); + } + + private void RecoverFiles() + { + string[] comps = FileName.Split(Path.DirectorySeparatorChar); + string bareName = comps[comps.Length - 1]; + string directory = System.IO.Path.GetDirectoryName(FileName); + DirectoryInfo di = new DirectoryInfo(directory); + if (!di.Exists) return; + int prevSegmentId = -1; + foreach (FileInfo item in di.GetFiles(bareName + "*")) + { + // TODO(Tianyu): Depending on string parsing is bad. But what can one do when an entire cloud service API has no doc? + int segmentId = Int32.Parse(item.Name.Replace(bareName, "").Replace(".", "")); + if (segmentId != prevSegmentId + 1) + { + startSegment = segmentId; + + } + else + { + endSegment = segmentId; + } + prevSegmentId = segmentId; + } + // No need to populate map because logHandles use Open or create on files. } /// @@ -125,6 +152,10 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, } } + /// + /// + /// + /// public override void RemoveSegment(int segment) { if (logHandles.TryRemove(segment, out SafeFileHandle logHandle)) @@ -134,6 +165,12 @@ public override void RemoveSegment(int segment) } } + /// + /// + /// + /// + /// + /// public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result) { RemoveSegment(segment); diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index 2a2564fd4..c17c6ed38 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -36,9 +36,37 @@ public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, this.preallocateFile = preallocateFile; this.deleteOnClose = deleteOnClose; logHandles = new ConcurrentDictionary(); + RecoverFiles(); } + private void RecoverFiles() + { + string directory = System.IO.Path.GetDirectoryName(FileName); + DirectoryInfo di = new DirectoryInfo(directory); + int prevSegmentId = -1; + foreach (FileInfo item in di.GetFiles(FileName + "*")) + { + Console.WriteLine(FileName); + // TODO(Tianyu): Depending on string parsing is bad. But what can one do when an entire cloud service API has no doc? + int segmentId = Int32.Parse(item.Name.Replace(FileName, "").Replace(".", "")); + Console.WriteLine(segmentId); + if (segmentId != prevSegmentId + 1) + { + startSegment = segmentId; + + } + else + { + endSegment = segmentId; + } + prevSegmentId = segmentId; + } + // No need to populate map because logHandles use Open or create on files. + } + + + class ReadCallbackWrapper { readonly IOCompletionCallback callback; @@ -139,6 +167,10 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, new WriteCallbackWrapper(callback, asyncResult, memory).Callback, null); } + /// + /// + /// + /// public override void RemoveSegment(int segment) { if (logHandles.TryRemove(segment, out Stream logHandle)) @@ -148,6 +180,12 @@ public override void RemoveSegment(int segment) } } + /// + /// + /// + /// + /// + /// public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result) { RemoveSegment(segment); diff --git a/cs/src/core/Device/NullDevice.cs b/cs/src/core/Device/NullDevice.cs index 30412d5ec..c8cca2cb4 100644 --- a/cs/src/core/Device/NullDevice.cs +++ b/cs/src/core/Device/NullDevice.cs @@ -61,15 +61,25 @@ public override unsafe void WriteAsync(IntPtr alignedSourceAddress, int segmentI callback(0, numBytesToWrite, ov_native); } + /// + /// + /// + /// public override void RemoveSegment(int segment) { // No-op } + /// + /// + /// + /// + /// + /// public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result) => callback(result); /// - /// + /// /// public override void Close() { diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 4a0955903..f89b3a1f3 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -30,15 +30,23 @@ public abstract class StorageDeviceBase : IDevice public string FileName { get; } /// - /// Returns the maximum capacity of the storage device, in number of bytes. - /// If returned -1, the storage device has no capacity limit. + /// /// public long Capacity { get; } + /// + /// + /// public int StartSegment { get { return startSegment; } } + /// + /// + /// public int EndSegment { get { return endSegment; } } + /// + /// + /// public long SegmentSize { get { return segmentSize; } } /// @@ -49,10 +57,17 @@ public abstract class StorageDeviceBase : IDevice private int segmentSizeBits; private ulong segmentSizeMask; - // A device may have internal in-memory data structure that requires epoch protection under concurrent access. + /// + /// Instance of the epoch protection framework in the current system. + /// A device may have internal in-memory data structure that requires epoch protection under concurrent access. + /// protected LightEpoch epoch; - private int startSegment, endSegment; + /// + /// start and end segment corresponding to and . Subclasses are + /// allowed to modify these as needed. + /// + protected int startSegment = 0, endSegment = -1; /// /// Initializes a new StorageDeviceBase @@ -76,6 +91,7 @@ public StorageDeviceBase(string filename, uint sectorSize, long capacity) /// Initialize device /// /// + /// public virtual void Initialize(long segmentSize, LightEpoch epoch = null) { // TODO(Tianyu): Alternatively, we can adjust capacity based on the segment size: given a phsyical upper limit of capacity, @@ -143,8 +159,19 @@ public void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddre aligned_read_length, callback, asyncResult); } + /// + /// + /// + /// + /// + /// public abstract void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result); + /// + /// + /// By default the implementation calls into + /// + /// public virtual void RemoveSegment(int segment) { ManualResetEventSlim completionEvent = new ManualResetEventSlim(false); @@ -152,7 +179,13 @@ public virtual void RemoveSegment(int segment) completionEvent.Wait(); } - public virtual void TruncateUntilSegmentAsync(int toSegment, AsyncCallback callback, IAsyncResult result) + /// + /// + /// + /// + /// + /// + public void TruncateUntilSegmentAsync(int toSegment, AsyncCallback callback, IAsyncResult result) { // Reset begin range to at least toAddress if (!Utility.MonotonicUpdate(ref startSegment, toSegment, out int oldStart)) @@ -180,7 +213,11 @@ public virtual void TruncateUntilSegmentAsync(int toSegment, AsyncCallback callb }); } - public virtual void TruncateUntilSegment(int toSegment) + /// + /// + /// + /// + public void TruncateUntilSegment(int toSegment) { using (ManualResetEventSlim completionEvent = new ManualResetEventSlim(false)) { @@ -189,12 +226,22 @@ public virtual void TruncateUntilSegment(int toSegment) } } + /// + /// + /// + /// + /// + /// public virtual void TruncateUntilAddressAsync(long toAddress, AsyncCallback callback, IAsyncResult result) { // Truncate only up to segment boundary if address is not aligned TruncateUntilSegmentAsync((int)toAddress >> segmentSizeBits, callback, result); } + /// + /// + /// + /// public virtual void TruncateUntilAddress(long toAddress) { using (ManualResetEventSlim completionEvent = new ManualResetEventSlim(false)) diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index e8a5e7e6d..b1ea4af89 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -8,6 +8,12 @@ namespace FASTER.core { + /// + /// A logically composes multiple into a single storage device. It is assumed + /// that some are used as caches while there is one that is considered the commit point, i.e. when a write is completed + /// on the device, it is considered persistent. Reads are served from the closest device with available data. Writes are issued in parallel to + /// all devices + /// class TieredStorageDevice : StorageDeviceBase { private readonly IList devices; @@ -19,7 +25,7 @@ class TieredStorageDevice : StorageDeviceBase /// /// /// The index of an IDevice in . When a write has been completed on the device, - /// the write is considered persistent. It is guaranteed that the callback in + /// the write is considered persistent. It is guaranteed that the callback in /// will not be called until the write is completed on the commit point device. /// /// diff --git a/cs/src/devices/AzureStorageDevice/AzureStorageDevice.cs b/cs/src/devices/AzureStorageDevice/AzureStorageDevice.cs index 505749998..8c874fb62 100644 --- a/cs/src/devices/AzureStorageDevice/AzureStorageDevice.cs +++ b/cs/src/devices/AzureStorageDevice/AzureStorageDevice.cs @@ -50,6 +50,34 @@ public AzureStorageDevice(string connectionString, string containerName, string blobs = new ConcurrentDictionary(); this.blobName = blobName; this.deleteOnClose = deleteOnClose; + RecoverBlobs(); + } + + private void RecoverBlobs() + { + int prevSegmentId = -1; + foreach (IListBlobItem item in container.ListBlobs(blobName)) + { + // TODO(Tianyu): Depending on string parsing is bad. But what can one do when an entire cloud service API has no doc? + string[] parts = item.Uri.Segments; + int segmentId = Int32.Parse(parts[parts.Length - 1].Replace(blobName, "")); + if (segmentId != prevSegmentId + 1) + { + startSegment = segmentId; + + } + else + { + endSegment = segmentId; + } + prevSegmentId = segmentId; + } + + for (int i = startSegment; i <= endSegment; i++) + { + bool ret = blobs.TryAdd(i, new BlobEntry(container.GetPageBlobReference(GetSegmentBlobName(i)))); + Debug.Assert(ret, "Recovery of blobs is single-threaded and should not yield any failure due to concurrency"); + } } /// @@ -69,6 +97,13 @@ public override void Close() } } } + + /// + /// + /// + /// + /// + /// public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAsyncResult result) { if (blobs.TryRemove(segment, out BlobEntry blob)) @@ -81,7 +116,7 @@ public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAs pageBlob.EndDelete(ar); } - catch (Exception e) + catch (Exception) { // Can I do anything else other than printing out an error message? } @@ -131,7 +166,7 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti BlobEntry entry = new BlobEntry(); if (blobs.TryAdd(segmentId, entry)) { - CloudPageBlob pageBlob = container.GetPageBlobReference(blobName + segmentId); + CloudPageBlob pageBlob = container.GetPageBlobReference(GetSegmentBlobName(segmentId)); // If segment size is -1, which denotes absence, we request the largest possible blob. This is okay because // page blobs are not backed by real pages on creation, and the given size is only a the physical limit of // how large it can grow to. @@ -182,5 +217,10 @@ private static unsafe void WriteToBlobAsync(CloudPageBlob blob, IntPtr sourceAdd callback(0, numBytesToWrite, ovNative); }, asyncResult); } + + private string GetSegmentBlobName(int segmentId) + { + return blobName + segmentId; + } } } diff --git a/cs/src/devices/AzureStorageDevice/BlobEntry.cs b/cs/src/devices/AzureStorageDevice/BlobEntry.cs index e70d20b6d..e2ceffe18 100644 --- a/cs/src/devices/AzureStorageDevice/BlobEntry.cs +++ b/cs/src/devices/AzureStorageDevice/BlobEntry.cs @@ -21,15 +21,26 @@ class BlobEntry private ConcurrentQueue> pendingWrites; private int waitingCount; + /// + /// Creates a new BlobEntry to hold the given pageBlob. The pageBlob must already be created. + /// + /// + public BlobEntry(CloudPageBlob pageBlob) + { + this.pageBlob = pageBlob; + if (pageBlob != null) + { + pendingWrites = new ConcurrentQueue>(); + waitingCount = 0; + } + + } /// /// Creates a new BlobEntry, does not initialize a page blob. Use /// for actual creation. /// - public BlobEntry() + public BlobEntry() : this(null) { - pageBlob = null; - pendingWrites = new ConcurrentQueue>(); - waitingCount = 0; } /// diff --git a/cs/test/BasicDiskFASTERTests.cs b/cs/test/BasicDiskFASTERTests.cs index b85717bc2..c9092f23a 100644 --- a/cs/test/BasicDiskFASTERTests.cs +++ b/cs/test/BasicDiskFASTERTests.cs @@ -32,7 +32,7 @@ public void LocalStorageWriteRead() [Test] public void PageBlobWriteRead() { - if ("yes".Equals(Environment.GetEnvironmentVariable("RunAzureTests"))) + // if ("yes".Equals(Environment.GetEnvironmentVariable("RunAzureTests"))) TestDeviceWriteRead(new AzureStorageDevice(EMULATED_STORAGE_STRING, TEST_CONTAINER, "BasicDiskFASTERTests", false)); } [Test] From b2b927fbd05d4d0b262850bf9694d4522753b61e Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Tue, 23 Jul 2019 16:48:33 -0700 Subject: [PATCH 55/56] fix incorrect capacity calculation --- cs/src/core/Device/TieredStorageDevice.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs/src/core/Device/TieredStorageDevice.cs b/cs/src/core/Device/TieredStorageDevice.cs index b1ea4af89..1b6cbcf18 100644 --- a/cs/src/core/Device/TieredStorageDevice.cs +++ b/cs/src/core/Device/TieredStorageDevice.cs @@ -150,7 +150,7 @@ private static long ComputeCapacity(IList devices) Debug.Assert(device == devices[devices.Count - 1], "Only the last tier storage of a tiered storage device can have unspecified capacity"); return Devices.CAPACITY_UNSPECIFIED; } - result += device.Capacity; + result = Math.Max(result, device.Capacity); } return result; } From 6caeac59ddb588b5f42387019a9887096f37c388 Mon Sep 17 00:00:00 2001 From: Tianyu Li Date: Wed, 24 Jul 2019 11:12:15 -0700 Subject: [PATCH 56/56] fix initialization --- cs/src/devices/AzureStorageDevice/BlobEntry.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cs/src/devices/AzureStorageDevice/BlobEntry.cs b/cs/src/devices/AzureStorageDevice/BlobEntry.cs index e2ceffe18..2ad46bcf7 100644 --- a/cs/src/devices/AzureStorageDevice/BlobEntry.cs +++ b/cs/src/devices/AzureStorageDevice/BlobEntry.cs @@ -28,8 +28,9 @@ class BlobEntry public BlobEntry(CloudPageBlob pageBlob) { this.pageBlob = pageBlob; - if (pageBlob != null) + if (pageBlob == null) { + // Only need to allocate a queue when we potentially need to asynchronously create a blob pendingWrites = new ConcurrentQueue>(); waitingCount = 0; }