diff --git a/include/faabric/util/snapshot.h b/include/faabric/util/snapshot.h index bad839899..e0d195f6f 100644 --- a/include/faabric/util/snapshot.h +++ b/include/faabric/util/snapshot.h @@ -6,6 +6,8 @@ #include #include +#include + namespace faabric::util { enum SnapshotDataType @@ -36,14 +38,27 @@ struct SnapshotMergeRegion class SnapshotDiff { public: + SnapshotDataType dataType = SnapshotDataType::Raw; + SnapshotMergeOperation operation = SnapshotMergeOperation::Overwrite; uint32_t offset = 0; size_t size = 0; const uint8_t* data = nullptr; - SnapshotDataType dataType = SnapshotDataType::Raw; - SnapshotMergeOperation operation = SnapshotMergeOperation::Overwrite; SnapshotDiff() = default; + SnapshotDiff(SnapshotDataType dataTypeIn, + SnapshotMergeOperation operationIn, + uint32_t offsetIn, + const uint8_t* dataIn, + size_t sizeIn) + { + dataType = dataTypeIn; + operation = operationIn; + offset = offsetIn; + data = dataIn; + size = sizeIn; + } + SnapshotDiff(uint32_t offsetIn, const uint8_t* dataIn, size_t sizeIn) { offset = offsetIn; @@ -76,4 +91,8 @@ class SnapshotData // order of offsets std::map mergeRegions; }; + +std::string snapshotDataTypeStr(SnapshotDataType dt); + +std::string snapshotMergeOpStr(SnapshotMergeOperation op); } diff --git a/src/util/snapshot.cpp b/src/util/snapshot.cpp index c39dfb129..f49e99a55 100644 --- a/src/util/snapshot.cpp +++ b/src/util/snapshot.cpp @@ -42,15 +42,32 @@ std::vector SnapshotData::getChangeDiffs(const uint8_t* updated, std::vector dirtyPageNumbers = getDirtyPageNumbers(updated, nThisPages); + SPDLOG_TRACE("Diffing {} pages with {} changed pages and {} merge regions", + nThisPages, + dirtyPageNumbers.size(), + mergeRegions.size()); + + for (auto& m : mergeRegions) { + SPDLOG_TRACE("{} {} merge region at {} {}-{}", + snapshotDataTypeStr(m.second.dataType), + snapshotMergeOpStr(m.second.operation), + m.first, + m.second.offset, + m.second.offset + m.second.length); + } + // Get iterator over merge regions std::map::iterator mergeIt = mergeRegions.begin(); // Get byte-wise diffs _within_ the dirty pages // - // NOTE - this will cause diffs to be split across pages if they hit a page - // boundary, but we can be relatively confident that variables will be - // page-aligned so this shouldn't be a problem + // NOTE - if raw diffs cover page boundaries, they will be split into + // multiple diffs, each of which is page-aligned. + // We can be relatively confident that variables will be page-aligned so + // this shouldn't be a problem. + // + // Merge regions support crossing page boundaries. // // For each byte we encounter have the following possible scenarios: // @@ -75,12 +92,44 @@ std::vector SnapshotData::getChangeDiffs(const uint8_t* updated, offset = pageOffset + b; bool isDirtyByte = *(data + offset) != *(updated + offset); + // Skip any merge regions we've passed + while (mergeIt != mergeRegions.end() && + offset >= + (mergeIt->second.offset + mergeIt->second.length)) { + SnapshotMergeRegion region = mergeIt->second; + SPDLOG_TRACE("At offset {}, past region {} {} {}-{}", + offset, + snapshotDataTypeStr(region.dataType), + snapshotMergeOpStr(region.operation), + region.offset, + region.offset + region.length); + + ++mergeIt; + } + + // Check if we're in a merge region bool isInMergeRegion = mergeIt != mergeRegions.end() && offset >= mergeIt->second.offset && offset < (mergeIt->second.offset + mergeIt->second.length); if (isDirtyByte && isInMergeRegion) { + // If we've entered a merge region with a diff in progress, we + // need to close it off + if (diffInProgress) { + diffs.emplace_back( + diffStart, updated + diffStart, offset - diffStart); + + SPDLOG_TRACE( + "Finished {} {} diff between {}-{} before merge region", + snapshotDataTypeStr(diffs.back().dataType), + snapshotMergeOpStr(diffs.back().operation), + diffs.back().offset, + diffs.back().offset + diffs.back().size); + + diffInProgress = false; + } + SnapshotMergeRegion region = mergeIt->second; // Set up the diff @@ -91,8 +140,6 @@ std::vector SnapshotData::getChangeDiffs(const uint8_t* updated, diff.dataType = region.dataType; diff.operation = region.operation; - bool isIgnore = false; - // Modify diff data for certain operations switch (region.dataType) { case (SnapshotDataType::Int): { @@ -144,7 +191,10 @@ std::vector SnapshotData::getChangeDiffs(const uint8_t* updated, case (SnapshotDataType::Raw): { switch (region.operation) { case (SnapshotMergeOperation::Ignore): { - isIgnore = true; + break; + } + case (SnapshotMergeOperation::Overwrite): { + // Default behaviour break; } default: { @@ -155,6 +205,7 @@ std::vector SnapshotData::getChangeDiffs(const uint8_t* updated, "Unhandled raw merge operation"); } } + break; } default: { @@ -165,35 +216,83 @@ std::vector SnapshotData::getChangeDiffs(const uint8_t* updated, } } + SPDLOG_TRACE("Diff at {} falls in {} {} merge region {}-{}", + pageOffset + b, + snapshotDataTypeStr(region.dataType), + snapshotMergeOpStr(region.operation), + region.offset, + region.offset + region.length); + // Add the diff to the list - if (!isIgnore) { + if (diff.operation != SnapshotMergeOperation::Ignore) { diffs.emplace_back(diff); } - // Bump the loop variable to the end of this region (note that - // the loop itself will increment onto the next) - b = (region.offset - pageOffset) + (region.length - 1); + // Work out the offset where this region ends + int regionEndOffset = + (region.offset - pageOffset) + region.length; - // Move onto the next merge region - ++mergeIt; + if (regionEndOffset < HOST_PAGE_SIZE) { + // Skip over this region, still more offsets left in this + // page + SPDLOG_TRACE( + "{} {} merge region {}-{} finished. Skipping to {}", + snapshotDataTypeStr(region.dataType), + snapshotMergeOpStr(region.operation), + region.offset, + region.offset + region.length, + pageOffset + regionEndOffset); + + // Bump the loop variable to the end of this region (note + // that the loop itself will increment onto the next). + b = regionEndOffset - 1; + } else { + // Merge region extends over this page, move onto next + SPDLOG_TRACE( + "{} {} merge region {}-{} over page boundary {} ({}-{})", + snapshotDataTypeStr(region.dataType), + snapshotMergeOpStr(region.operation), + region.offset, + region.offset + region.length, + i, + pageOffset, + pageOffset + HOST_PAGE_SIZE); + + break; + } } else if (isDirtyByte && !diffInProgress) { // Diff starts here if it's different and diff not in progress diffInProgress = true; diffStart = offset; + + SPDLOG_TRACE("Started Raw Overwrite diff at {}", diffStart); } else if (!isDirtyByte && diffInProgress) { // Diff ends if it's not different and diff is in progress diffInProgress = false; diffs.emplace_back( diffStart, updated + diffStart, offset - diffStart); + + SPDLOG_TRACE("Finished {} {} diff between {}-{}", + snapshotDataTypeStr(diffs.back().dataType), + snapshotMergeOpStr(diffs.back().operation), + diffs.back().offset, + diffs.back().offset + diffs.back().size); } } - // If we've reached the end with a diff in progress, we need to close it - // off + // If we've reached the end of this page with a diff in progress, we + // need to close it off if (diffInProgress) { offset++; + diffs.emplace_back( diffStart, updated + diffStart, offset - diffStart); + + SPDLOG_TRACE("Found {} {} diff between {}-{} at end of page", + snapshotDataTypeStr(diffs.back().dataType), + snapshotMergeOpStr(diffs.back().operation), + diffs.back().offset, + diffs.back().offset + diffs.back().size); } } @@ -219,4 +318,51 @@ void SnapshotData::addMergeRegion(uint32_t offset, faabric::util::UniqueLock lock(snapMx); mergeRegions[offset] = region; } + +std::string snapshotDataTypeStr(SnapshotDataType dt) +{ + switch (dt) { + case (SnapshotDataType::Raw): { + return "Raw"; + } + case (SnapshotDataType::Int): { + return "Int"; + } + default: { + SPDLOG_ERROR("Cannot convert snapshot data type to string: {}", dt); + throw std::runtime_error("Cannot convert data type to string"); + } + } +} + +std::string snapshotMergeOpStr(SnapshotMergeOperation op) +{ + switch (op) { + case (SnapshotMergeOperation::Ignore): { + return "Ignore"; + } + case (SnapshotMergeOperation::Max): { + return "Max"; + } + case (SnapshotMergeOperation::Min): { + return "Min"; + } + case (SnapshotMergeOperation::Overwrite): { + return "Overwrite"; + } + case (SnapshotMergeOperation::Product): { + return "Product"; + } + case (SnapshotMergeOperation::Subtract): { + return "Subtract"; + } + case (SnapshotMergeOperation::Sum): { + return "Sum"; + } + default: { + SPDLOG_ERROR("Cannot convert snapshot merge op to string: {}", op); + throw std::runtime_error("Cannot convert merge op to string"); + } + } +} } diff --git a/tests/test/util/test_snapshot.cpp b/tests/test/util/test_snapshot.cpp index db8b88089..4ae49966c 100644 --- a/tests/test/util/test_snapshot.cpp +++ b/tests/test/util/test_snapshot.cpp @@ -12,7 +12,59 @@ using namespace faabric::util; namespace tests { -TEST_CASE_METHOD(SnapshotTestFixture, +class SnapshotMergeTestFixture : public SnapshotTestFixture +{ + public: + SnapshotMergeTestFixture() {} + ~SnapshotMergeTestFixture() {} + + protected: + std::string snapKey; + int snapPages; + faabric::util::SnapshotData snap; + + uint8_t* setUpSnapshot(int snapPages) + { + snapKey = "foobar123"; + snap.size = snapPages * faabric::util::HOST_PAGE_SIZE; + snap.data = allocatePages(snapPages); + + // Take the snapshot + reg.takeSnapshot(snapKey, snap, true); + + // Map the snapshot + uint8_t* sharedMem = allocatePages(snapPages); + reg.mapSnapshot(snapKey, sharedMem); + + // Reset dirty tracking + faabric::util::resetDirtyTracking(); + + return sharedMem; + } + + void checkDiffs(std::vector& actualDiffs, + std::vector& expectedDiffs) + { + REQUIRE(actualDiffs.size() == expectedDiffs.size()); + + for (int i = 0; i < actualDiffs.size(); i++) { + SnapshotDiff actualDiff = actualDiffs.at(i); + SnapshotDiff expectedDiff = expectedDiffs.at(i); + + REQUIRE(actualDiff.operation == expectedDiff.operation); + REQUIRE(actualDiff.dataType == expectedDiff.dataType); + REQUIRE(actualDiff.offset == expectedDiff.offset); + + std::vector actualData(actualDiff.data, + actualDiff.data + actualDiff.size); + std::vector expectedData( + expectedDiff.data, expectedDiff.data + expectedDiff.size); + REQUIRE(actualData == expectedData); + } + } +}; + +TEST_CASE_METHOD(SnapshotMergeTestFixture, "Detailed test snapshot merge regions with ints", "[util]") { @@ -123,7 +175,7 @@ TEST_CASE_METHOD(SnapshotTestFixture, deallocatePages(snap.data, snapPages); } -TEST_CASE_METHOD(SnapshotTestFixture, +TEST_CASE_METHOD(SnapshotMergeTestFixture, "Test edge-cases of snapshot merge regions", "[util]") { @@ -139,22 +191,22 @@ TEST_CASE_METHOD(SnapshotTestFixture, int originalA = 50; int finalA = 25; int subA = 25; - int offsetA = 0; + uint32_t offsetA = 0; int originalB = 100; int finalB = 200; int sumB = 100; - int offsetB = HOST_PAGE_SIZE + (2 * sizeof(int32_t)); + uint32_t offsetB = HOST_PAGE_SIZE + (2 * sizeof(int32_t)); int originalC = 200; int finalC = 150; int subC = 50; - int offsetC = offsetB + sizeof(int32_t); + uint32_t offsetC = offsetB + sizeof(int32_t); int originalD = 100; int finalD = 150; int sumD = 50; - int offsetD = snapSize - sizeof(int32_t); + uint32_t offsetD = snapSize - sizeof(int32_t); faabric::util::SnapshotData snap; snap.size = snapSize; @@ -202,37 +254,44 @@ TEST_CASE_METHOD(SnapshotTestFixture, *(int*)(sharedMem + offsetD) = finalD; // Check the diffs + std::vector expectedDiffs = { + { SnapshotDataType::Int, + SnapshotMergeOperation::Subtract, + offsetA, + BYTES(&subA), + sizeof(int32_t) }, + { SnapshotDataType::Int, + SnapshotMergeOperation::Sum, + offsetB, + BYTES(&sumB), + sizeof(int32_t) }, + { SnapshotDataType::Int, + SnapshotMergeOperation::Subtract, + offsetC, + BYTES(&subC), + sizeof(int32_t) }, + { SnapshotDataType::Int, + SnapshotMergeOperation::Sum, + offsetD, + BYTES(&sumD), + sizeof(int32_t) }, + }; + std::vector actualDiffs = snap.getChangeDiffs(sharedMem, sharedMemSize); REQUIRE(actualDiffs.size() == 4); - SnapshotDiff diffA = actualDiffs.at(0); - SnapshotDiff diffB = actualDiffs.at(1); - SnapshotDiff diffC = actualDiffs.at(2); - SnapshotDiff diffD = actualDiffs.at(3); - - REQUIRE(diffA.offset == offsetA); - REQUIRE(diffB.offset == offsetB); - REQUIRE(diffC.offset == offsetC); - REQUIRE(diffD.offset == offsetD); - - REQUIRE(diffA.operation == SnapshotMergeOperation::Subtract); - REQUIRE(diffB.operation == SnapshotMergeOperation::Sum); - REQUIRE(diffC.operation == SnapshotMergeOperation::Subtract); - REQUIRE(diffD.operation == SnapshotMergeOperation::Sum); - - REQUIRE(*(int*)diffA.data == subA); - REQUIRE(*(int*)diffB.data == sumB); - REQUIRE(*(int*)diffC.data == subC); - REQUIRE(*(int*)diffD.data == sumD); + checkDiffs(actualDiffs, expectedDiffs); } -TEST_CASE_METHOD(SnapshotTestFixture, "Test snapshot merge regions", "[util]") +TEST_CASE_METHOD(SnapshotMergeTestFixture, + "Test snapshot merge regions", + "[util]") { std::string snapKey = "foobar123"; int snapPages = 5; - int offset = HOST_PAGE_SIZE + (10 * sizeof(int32_t)); + uint32_t offset = HOST_PAGE_SIZE + (10 * sizeof(int32_t)); faabric::util::SnapshotData snap; snap.size = snapPages * faabric::util::HOST_PAGE_SIZE; @@ -317,10 +376,12 @@ TEST_CASE_METHOD(SnapshotTestFixture, "Test snapshot merge regions", "[util]") expectedData = originalData; dataType = faabric::util::SnapshotDataType::Raw; - operation = faabric::util::SnapshotMergeOperation::Ignore; + operation = faabric::util::SnapshotMergeOperation::Overwrite; SECTION("Ignore") { + operation = faabric::util::SnapshotMergeOperation::Ignore; + // Scatter some modifications through the updated data, to make sure // none are picked up updatedData[0] = 1; @@ -360,21 +421,21 @@ TEST_CASE_METHOD(SnapshotTestFixture, "Test snapshot merge regions", "[util]") REQUIRE(actualDiffs.size() == expectedNumDiffs); if (expectedNumDiffs == 1) { - SnapshotDiff diff = actualDiffs.at(0); - REQUIRE(diff.offset == offset); - REQUIRE(diff.operation == operation); - REQUIRE(diff.dataType == dataType); - REQUIRE(diff.size == dataLength); - - // Check actual and expected - std::vector actualData(diff.data, diff.data + dataLength); - REQUIRE(actualData == expectedData); + std::vector expectedDiffs = { { dataType, + operation, + offset, + expectedData.data(), + expectedData.size() } }; + + checkDiffs(actualDiffs, expectedDiffs); } deallocatePages(snap.data, snapPages); } -TEST_CASE_METHOD(SnapshotTestFixture, "Test invalid snapshot merges", "[util]") +TEST_CASE_METHOD(SnapshotMergeTestFixture, + "Test invalid snapshot merges", + "[util]") { std::string snapKey = "foobar123"; int snapPages = 3; @@ -395,6 +456,7 @@ TEST_CASE_METHOD(SnapshotTestFixture, "Test invalid snapshot merges", "[util]") SECTION("Integer overwrite") { dataType = faabric::util::SnapshotDataType::Int; + operation = faabric::util::SnapshotMergeOperation::Overwrite; dataLength = sizeof(int32_t); expectedMsg = "Unhandled integer merge operation"; } @@ -402,6 +464,7 @@ TEST_CASE_METHOD(SnapshotTestFixture, "Test invalid snapshot merges", "[util]") SECTION("Raw sum") { dataType = faabric::util::SnapshotDataType::Raw; + operation = faabric::util::SnapshotMergeOperation::Sum; dataLength = 123; expectedMsg = "Unhandled raw merge operation"; } @@ -436,4 +499,181 @@ TEST_CASE_METHOD(SnapshotTestFixture, "Test invalid snapshot merges", "[util]") REQUIRE(failed); deallocatePages(snap.data, snapPages); } + +TEST_CASE_METHOD(SnapshotMergeTestFixture, "Test cross-page ignores", "[util]") +{ + int snapPages = 6; + size_t sharedMemSize = snapPages * HOST_PAGE_SIZE; + uint8_t* sharedMem = setUpSnapshot(snapPages); + + // Add ignore regions that cover multiple pages + int ignoreOffsetA = HOST_PAGE_SIZE + 100; + int ignoreLengthA = 2 * HOST_PAGE_SIZE; + snap.addMergeRegion(ignoreOffsetA, + ignoreLengthA, + faabric::util::SnapshotDataType::Raw, + faabric::util::SnapshotMergeOperation::Ignore); + + int ignoreOffsetB = sharedMemSize - (HOST_PAGE_SIZE + 10); + int ignoreLengthB = 30; + snap.addMergeRegion(ignoreOffsetB, + ignoreLengthB, + faabric::util::SnapshotDataType::Raw, + faabric::util::SnapshotMergeOperation::Ignore); + + // Add some modifications that will cause diffs, and some should be ignored + std::vector dataA(10, 1); + std::vector dataB(1, 1); + std::vector dataC(1, 1); + std::vector dataD(3, 1); + + // Not ignored, start of memory + uint32_t offsetA = 0; + std::memcpy(sharedMem + offsetA, dataA.data(), dataA.size()); + + // Not ignored, just before first ignore region + uint32_t offsetB = ignoreOffsetA - 1; + std::memcpy(sharedMem + offsetB, dataB.data(), dataB.size()); + + // Not ignored, just after first ignore region + uint32_t offsetC = ignoreOffsetA + ignoreLengthA; + std::memcpy(sharedMem + offsetC, dataC.data(), dataC.size()); + + // Not ignored, just before second ignore region + uint32_t offsetD = ignoreOffsetB - 4; + std::memcpy(sharedMem + offsetD, dataD.data(), dataD.size()); + + // Ignored, start of first ignore region + sharedMem[ignoreOffsetA] = (uint8_t)1; + + // Ignored, just before page boundary in ignore region + sharedMem[(2 * HOST_PAGE_SIZE) - 1] = (uint8_t)1; + + // Ignored, just after page boundary in ignore region + sharedMem[(2 * HOST_PAGE_SIZE)] = (uint8_t)1; + + // Deliberately don't put any changes after the next page boundary to check + // that it rolls over properly + + // Ignored, just inside second region + sharedMem[ignoreOffsetB + 2] = (uint8_t)1; + + // Ignored, end of second region + sharedMem[ignoreOffsetB + ignoreLengthB - 1] = (uint8_t)1; + + std::vector expectedDiffs = { + { offsetA, dataA.data(), dataA.size() }, + { offsetB, dataB.data(), dataB.size() }, + { offsetC, dataC.data(), dataC.size() }, + { offsetD, dataD.data(), dataD.size() }, + }; + + // Check number of diffs + std::vector actualDiffs = + snap.getChangeDiffs(sharedMem, sharedMemSize); + + checkDiffs(actualDiffs, expectedDiffs); +} + +TEST_CASE_METHOD(SnapshotMergeTestFixture, + "Test fine-grained byte-wise diffs", + "[util]") +{ + int snapPages = 3; + size_t sharedMemSize = snapPages * HOST_PAGE_SIZE; + uint8_t* sharedMem = setUpSnapshot(snapPages); + + // Add some tightly-packed changes + uint32_t offsetA = 0; + std::vector dataA(10, 1); + std::memcpy(sharedMem + offsetA, dataA.data(), dataA.size()); + + uint32_t offsetB = dataA.size() + 1; + std::vector dataB(2, 1); + std::memcpy(sharedMem + offsetB, dataB.data(), dataB.size()); + + uint32_t offsetC = offsetB + 3; + std::vector dataC(1, 1); + std::memcpy(sharedMem + offsetC, dataC.data(), dataC.size()); + + uint32_t offsetD = offsetC + 2; + std::vector dataD(1, 1); + std::memcpy(sharedMem + offsetD, dataD.data(), dataD.size()); + + std::vector expectedDiffs = { + { offsetA, dataA.data(), dataA.size() }, + { offsetB, dataB.data(), dataB.size() }, + { offsetC, dataC.data(), dataC.size() }, + { offsetD, dataD.data(), dataD.size() }, + }; + + // Check number of diffs + std::vector actualDiffs = + snap.getChangeDiffs(sharedMem, sharedMemSize); + + checkDiffs(actualDiffs, expectedDiffs); +} + +TEST_CASE_METHOD(SnapshotMergeTestFixture, + "Test mix of applicable and non-applicable merge regions", + "[util]") +{ + int snapPages = 6; + size_t sharedMemSize = snapPages * HOST_PAGE_SIZE; + uint8_t* sharedMem = setUpSnapshot(snapPages); + + // Add a couple of merge regions on each page, which should be skipped as + // they won't cover any changes + for (int i = 0; i < snapPages; i++) { + // Ignore + int iOff = i * HOST_PAGE_SIZE; + snap.addMergeRegion(iOff, + 10, + faabric::util::SnapshotDataType::Raw, + faabric::util::SnapshotMergeOperation::Ignore); + + // Sum + int sOff = ((i + 1) * HOST_PAGE_SIZE) - (2 * sizeof(int32_t)); + snap.addMergeRegion(sOff, + sizeof(int32_t), + faabric::util::SnapshotDataType::Int, + faabric::util::SnapshotMergeOperation::Sum); + } + + // Add an ignore region that should take effect, along with a corresponding + // change to be ignored + uint32_t ignoreA = (2 * HOST_PAGE_SIZE) + 2; + snap.addMergeRegion(ignoreA, + 20, + faabric::util::SnapshotDataType::Raw, + faabric::util::SnapshotMergeOperation::Ignore); + std::vector dataA(10, 1); + std::memcpy(sharedMem + ignoreA, dataA.data(), dataA.size()); + + // Add a sum region and data that should also take effect + uint32_t sumOffset = (4 * HOST_PAGE_SIZE) + 100; + int sumValue = 333; + int sumOriginal = 111; + int sumExpected = 222; + snap.addMergeRegion(sumOffset, + sizeof(int32_t), + faabric::util::SnapshotDataType::Int, + faabric::util::SnapshotMergeOperation::Sum); + *(int*)(snap.data + sumOffset) = sumOriginal; + *(int*)(sharedMem + sumOffset) = sumValue; + + // Check diffs + std::vector expectedDiffs = { + { faabric::util::SnapshotDataType::Int, + faabric::util::SnapshotMergeOperation::Sum, + sumOffset, + BYTES(&sumExpected), + sizeof(int32_t) }, + }; + + std::vector actualDiffs = + snap.getChangeDiffs(sharedMem, sharedMemSize); + + checkDiffs(actualDiffs, expectedDiffs); +} }