Skip to content

Commit

Permalink
KVStore: Fix spurious region overlap when two region are both applyin…
Browse files Browse the repository at this point in the history
…g snapshots (#9330)

close #9329

Signed-off-by: Calvin Neo <calvinneo1995@gmail.com>

Co-authored-by: JaySon <tshent@qq.com>
  • Loading branch information
CalvinNeo and JaySon-Huang authored Aug 20, 2024
1 parent 4817c7d commit 5500b35
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 22 deletions.
2 changes: 1 addition & 1 deletion dbms/src/Debug/MockKVStore/MockRaftStoreProxy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ std::tuple<RegionPtr, PrehandleResult> MockRaftStoreProxy::snapshot(
}
catch (const Exception & e)
{
LOG_ERROR(log, "mock apply snapshot error {}", e.message());
LOG_ERROR(log, "mock apply snapshot exception {}", e.message());
e.rethrow();
}
LOG_FATAL(DB::Logger::get(), "Should not happen");
Expand Down
75 changes: 59 additions & 16 deletions dbms/src/Storages/KVStore/MultiRaft/ApplySnapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ void KVStore::checkAndApplyPreHandledSnapshot(const RegionPtrWrap & new_region,
// engine may delete data unsafely.
auto region_lock = region_manager.genRegionTaskLock(old_region->id());
old_region->setStateApplying();
tmt.getRegionTable().tryWriteBlockByRegion(old_region);
tryFlushRegionCacheInStorage(tmt, *old_region, log);
// It is not worthy to call `tryWriteBlockByRegion` and `tryFlushRegionCacheInStorage` here,
// even if the written data is useful, it could be overwritten later in `onSnapshot`.
// Note that we must persistRegion. This is to ensure even if a restart happens before
// the apply snapshot is finished, TiFlash can correctly reject the read index requests
persistRegion(*old_region, region_lock, PersistRegionReason::ApplySnapshotPrevRegion, "");
}
}
Expand All @@ -95,23 +97,57 @@ void KVStore::checkAndApplyPreHandledSnapshot(const RegionPtrWrap & new_region,
if (overlapped_region.first != region_id)
{
auto state = getProxyHelper()->getRegionLocalState(overlapped_region.first);
if (state.state() != raft_serverpb::PeerState::Tombstone)
auto extra_msg = fmt::format(
"state={}, tiflash_state={}, new_region_state={}",
state.ShortDebugString(),
overlapped_region.second->getMeta().getRegionState().getBase().ShortDebugString(),
new_region->getMeta().getRegionState().getBase().ShortDebugString());
if (state.state() == raft_serverpb::PeerState::Tombstone)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"range of region_id={} is overlapped with region_id={}, state: {}",
LOG_INFO(
log,
"range of region_id={} is overlapped with `Tombstone` region_id={}, {}",
region_id,
overlapped_region.first,
state.ShortDebugString());
extra_msg);
handleDestroy(overlapped_region.first, tmt, task_lock);
}
else if (state.state() == raft_serverpb::PeerState::Applying)
{
// In this case, the `overlapped_region` also has a snapshot applied in raftstore,
// and is pending to be applied in TiFlash.
auto r = RegionRangeKeys::makeComparableKeys(
TiKVKey::copyFrom(state.region().start_key()),
TiKVKey::copyFrom(state.region().end_key()));

if (RegionRangeKeys::isRangeOverlapped(new_range->comparableKeys(), r))
{
// If the range is still overlapped after the snapshot, there is a hard error.
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"range of region_id={} is overlapped with `Applying` region_id={}, {}",
region_id,
overlapped_region.first,
extra_msg);
}
else
{
LOG_INFO(
log,
"range of region_id={} is overlapped with `Applying` region_id={}, {}",
region_id,
overlapped_region.first,
extra_msg);
}
}
else
{
LOG_INFO(
log,
"range of region_id={} is overlapped with `Tombstone` region_id={}",
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"range of region_id={} is overlapped with region_id={}, {}",
region_id,
overlapped_region.first);
handleDestroy(overlapped_region.first, tmt, task_lock);
overlapped_region.first,
extra_msg);
}
}
}
Expand Down Expand Up @@ -161,6 +197,7 @@ void KVStore::onSnapshot(
{
RegionID region_id = new_region_wrap->id();

// 1. Try to clean stale data.
{
auto keyspace_id = new_region_wrap->getKeyspaceID();
auto table_id = new_region_wrap->getMappedTableID();
Expand Down Expand Up @@ -189,16 +226,19 @@ void KVStore::onSnapshot(
{
LOG_INFO(
log,
"clear old range before apply snapshot, region_id={} old_range={} new_range={} "
"region range changed before apply snapshot, region_id={} old_range={} new_range={} "
"keyspace={} table_id={}",
region_id,
old_key_range.toDebugString(),
new_key_range.toDebugString(),
keyspace_id,
table_id);
dm_storage->deleteRange(old_key_range, context.getSettingsRef());
// We must flush the deletion to the disk here, because we only flush new range when persisting this region later.
dm_storage->flushCache(context, old_key_range, /*try_until_succeed*/ true);
/// Previously, we clean `old_key_range` here. However, we can only clean `new_key_range` here, if there is also an overlapped snapshot in region worker queue.
/// Consider:
/// 1. there exists a region A of range [0..100)
/// 2. region A splitted into A' [0..50) and B [50..100)
/// 3. snapshot of B is applied into tiflash storage first
/// 4. when applying snapshot A -> A' of range [0..50), we should not clear the old range [0, 100) but only clean the new range
}
}
if constexpr (std::is_same_v<RegionPtrWrap, RegionPtrWithSnapshotFiles>)
Expand All @@ -225,6 +265,7 @@ void KVStore::onSnapshot(
static_assert(std::is_same_v<RegionPtrWrap, RegionPtrWithBlock>);
// Call `deleteRange` to delete data for range
dm_storage->deleteRange(new_key_range, context.getSettingsRef());
// We don't flushCache here, but flush as a whole in stage 2 in `tryFlushRegionCacheInStorage`.
}
}
catch (DB::Exception & e)
Expand All @@ -236,6 +277,7 @@ void KVStore::onSnapshot(
}
}

// 2. Dump data to RegionTable.
{
const auto range = new_region_wrap->getRange();
auto & region_table = tmt.getRegionTable();
Expand All @@ -261,6 +303,7 @@ void KVStore::onSnapshot(
// For `RegionPtrWithSnapshotFiles`, don't need to flush cache.
}

// Register the new Region.
RegionPtr new_region = new_region_wrap.base;
{
auto task_lock = genTaskLock();
Expand Down
17 changes: 17 additions & 0 deletions dbms/src/Storages/KVStore/MultiRaft/RegionRangeKeys.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,23 @@ class RegionRangeKeys : boost::noncopyable
KeyspaceID getKeyspaceID() const;
std::string toDebugString() const;

static bool isRangeOverlapped(const RegionRange & a, const RegionRange & b)
{
auto start = a.first.compare(b.first);
if (start == 0)
{
return true;
}
else if (start < 0)
{
return a.second.compare(b.first) > 0;
}
else
{
return b.second.compare(a.first) > 0;
}
}

private:
RegionRange ori;
std::pair<DecodedTiKVKeyPtr, DecodedTiKVKeyPtr> raw;
Expand Down
64 changes: 59 additions & 5 deletions dbms/src/Storages/KVStore/tests/gtest_kvstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,7 @@ try
RecordKVFormat::genKey(table_id, 0),
RecordKVFormat::genKey(table_id, 10000),
kvs.getProxyHelper());
// Fill data from 20 to 100.
GenMockSSTData(DMTestEnv::getMinimalTableInfo(table_id), table_id, region_id_str, 20, 100, 0);
std::vector<SSTView> sst_views{
SSTView{
Expand Down Expand Up @@ -1085,6 +1086,7 @@ try
if (ingest_using_split)
{
auto stats = storage->getStore()->getStoreStats();
// Including 0..20, 20..100, 100..inf.
ASSERT_EQ(3, stats.segment_count);
}

Expand All @@ -1097,6 +1099,7 @@ try
RecordKVFormat::genKey(table_id, 20000),
RecordKVFormat::genKey(table_id, 50000),
kvs.getProxyHelper());
// Fill data from 20100 to 20200.
GenMockSSTData(DMTestEnv::getMinimalTableInfo(table_id), table_id, region_id_str, 20100, 20200, 0);
std::vector<SSTView> sst_views{
SSTView{
Expand Down Expand Up @@ -1148,10 +1151,22 @@ try
ASSERT_EQ(0, gc_n);

auto stats = storage->getStore()->getStoreStats();
ASSERT_EQ(1, stats.segment_count);
ASSERT_EQ(0, stats.total_stable_size_on_disk);
ASSERT_EQ(0, stats.total_rows);
ASSERT_EQ(0, stats.total_size);
// The data of [20, 100), is not reclaimed during Apply Snapshot.
if (ingest_using_split)
{
ASSERT_EQ(3, stats.segment_count);
ASSERT_NE(0, stats.total_stable_size_on_disk);
ASSERT_EQ(80, stats.total_rows);
ASSERT_NE(0, stats.total_size);
}
else
{
// The only segment is not reclaimed.
ASSERT_EQ(1, stats.segment_count);
ASSERT_NE(0, stats.total_stable_size_on_disk);
ASSERT_EQ(180, stats.total_rows);
ASSERT_NE(0, stats.total_size);
}
}
}
CATCH
Expand Down Expand Up @@ -1294,7 +1309,7 @@ try
}
catch (Exception & e)
{
ASSERT_EQ(e.message(), "range of region_id=20 is overlapped with region_id=22, state: region { id: 22 }");
ASSERT_TRUE(e.message().rfind("range of region_id=20 is overlapped with region_id=22", 0) == 0);
}
}

Expand Down Expand Up @@ -1708,4 +1723,43 @@ TEST_F(RegionKVStoreOldTest, RegionRange)
}
}

TEST_F(RegionKVStoreOldTest, RegionRange2)
{
auto mustOverlap = [](std::string s1, std::string e1, std::string s2, std::string e2) {
auto r1 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s1), TiKVKey::copyFrom(e1));
auto r2 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s2), TiKVKey::copyFrom(e2));
ASSERT_TRUE(RegionRangeKeys::isRangeOverlapped(r1, r2));
ASSERT_TRUE(RegionRangeKeys::isRangeOverlapped(r2, r1));
};
auto mustNotOverlap = [](std::string s1, std::string e1, std::string s2, std::string e2) {
auto r1 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s1), TiKVKey::copyFrom(e1));
auto r2 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s2), TiKVKey::copyFrom(e2));
ASSERT_FALSE(RegionRangeKeys::isRangeOverlapped(r1, r2));
ASSERT_FALSE(RegionRangeKeys::isRangeOverlapped(r2, r1));
};
mustOverlap("", "a", "", "b");
mustOverlap("a", "", "b", "");
mustOverlap("", "", "b", "");
mustOverlap("", "", "", "a");
mustOverlap("", "", "a", "b");
mustOverlap("", "", "", "");

mustOverlap("a", "e", "", "f");
mustOverlap("a", "e", "", "");
mustOverlap("b", "e", "a", "");

mustOverlap("a", "e", "a", "e");
mustOverlap("a", "f", "c", "d");
mustOverlap("a", "f", "c", "f");

mustNotOverlap("a", "e", "e", "f");
mustNotOverlap("a", "e", "f", "g");
mustNotOverlap("", "e", "e", "f");
mustNotOverlap("a", "e", "e", "");
mustNotOverlap("", "e", "f", "g");
mustNotOverlap("a", "e", "f", "");
mustNotOverlap("", "e", "f", "");
mustNotOverlap("", "e", "e", "");
}

} // namespace DB::tests
77 changes: 77 additions & 0 deletions dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,4 +1164,81 @@ try
}
CATCH

TEST_F(RegionKVStoreTest, ApplyShrinkedRegion)
try
{
auto & ctx = TiFlashTestEnv::getGlobalContext();
ASSERT_NE(proxy_helper->sst_reader_interfaces.fn_key, nullptr);
UInt64 region_id = 1;
TableID table_id;

initStorages();
KVStore & kvs = getKVS();
table_id = proxy_instance->bootstrapTable(ctx, kvs, ctx.getTMTContext());
LOG_INFO(&Poco::Logger::get("Test"), "generated table_id {}", table_id);
proxy_instance->bootstrapWithRegion(kvs, ctx.getTMTContext(), region_id, std::nullopt);
auto kvr1 = kvs.getRegion(region_id);
auto r1 = proxy_instance->getRegion(region_id);
{
// Multiple files
MockSSTReader::getMockSSTData().clear();
MockSSTGenerator default_cf{902, 800, ColumnFamilyType::Default};
default_cf.insert(1, "v1");
default_cf.finish_file();
default_cf.insert(2, "v2");
default_cf.finish_file();
default_cf.insert(3, "v3");
default_cf.insert(4, "v4");
default_cf.finish_file();
default_cf.insert(5, "v5");
default_cf.insert(6, "v6");
default_cf.finish_file();
default_cf.insert(7, "v7");
default_cf.finish_file();
default_cf.freeze();
kvs.mutProxyHelperUnsafe()->sst_reader_interfaces = make_mock_sst_reader_interface();

auto make_meta = [&]() {
auto r2 = proxy_instance->getRegion(region_id);
auto modified_meta = r2->getState().region();
modified_meta.set_id(2);
modified_meta.set_start_key(RecordKVFormat::genKey(table_id, 1));
modified_meta.set_end_key(RecordKVFormat::genKey(table_id, 4));
modified_meta.add_peers()->set_id(2);
return modified_meta;
};
auto peer_id = kvr1->getMeta().peerId();
proxy_instance->debugAddRegions(
kvs,
ctx.getTMTContext(),
{2},
{{{RecordKVFormat::genKey(table_id, 0), RecordKVFormat::genKey(table_id, 4)}}});

// Overlap
EXPECT_THROW(
proxy_instance
->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false),
Exception);

LOG_INFO(log, "Set to applying");
// region_state is "applying", but the key-range in proxy side still overlaps.
r1->mutState().set_state(raft_serverpb::PeerState::Applying);
ASSERT_EQ(proxy_helper->getRegionLocalState(1).state(), raft_serverpb::PeerState::Applying);
EXPECT_THROW(
proxy_instance
->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false),
Exception);


LOG_INFO(log, "Shrink region 1");
// region_state is "applying", but the key-range in proxy side is not overlap.
r1->mutState().set_state(raft_serverpb::PeerState::Applying);
r1->mutState().mutable_region()->set_start_key(RecordKVFormat::genKey(table_id, 0));
r1->mutState().mutable_region()->set_end_key(RecordKVFormat::genKey(table_id, 1));
proxy_instance
->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false);
}
}
CATCH

} // namespace DB::tests

0 comments on commit 5500b35

Please sign in to comment.