Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KVStore: Fix spurious region overlap when two region are both applying snapshots #9330

Merged
merged 19 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dbms/src/Debug/MockKVStore/MockRaftStoreProxy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ std::tuple<RegionPtr, PrehandleResult> MockRaftStoreProxy::snapshot(
}
catch (const Exception & e)
{
LOG_ERROR(log, "mock apply snapshot error {}", e.message());
LOG_ERROR(log, "mock apply snapshot exception {}", e.message());
e.rethrow();
}
LOG_FATAL(DB::Logger::get(), "Should not happen");
Expand Down
75 changes: 59 additions & 16 deletions dbms/src/Storages/KVStore/MultiRaft/ApplySnapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ void KVStore::checkAndApplyPreHandledSnapshot(const RegionPtrWrap & new_region,
// engine may delete data unsafely.
auto region_lock = region_manager.genRegionTaskLock(old_region->id());
old_region->setStateApplying();
tmt.getRegionTable().tryWriteBlockByRegion(old_region);
tryFlushRegionCacheInStorage(tmt, *old_region, log);
// It is not worthy to call `tryWriteBlockByRegion` and `tryFlushRegionCacheInStorage` here,
// even if the written data is useful, it could be overwritten later in `onSnapshot`.
CalvinNeo marked this conversation as resolved.
Show resolved Hide resolved
// Note that we must persistRegion. This is to ensure even if a restart happens before
// the apply snapshot is finished, TiFlash can correctly reject the read index requests
persistRegion(*old_region, region_lock, PersistRegionReason::ApplySnapshotPrevRegion, "");
}
}
Expand All @@ -95,23 +97,57 @@ void KVStore::checkAndApplyPreHandledSnapshot(const RegionPtrWrap & new_region,
if (overlapped_region.first != region_id)
{
auto state = getProxyHelper()->getRegionLocalState(overlapped_region.first);
if (state.state() != raft_serverpb::PeerState::Tombstone)
auto extra_msg = fmt::format(
"state={}, tiflash_state={}, new_region_state={}",
JaySon-Huang marked this conversation as resolved.
Show resolved Hide resolved
state.ShortDebugString(),
overlapped_region.second->getMeta().getRegionState().getBase().ShortDebugString(),
new_region->getMeta().getRegionState().getBase().ShortDebugString());
if (state.state() == raft_serverpb::PeerState::Tombstone)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"range of region_id={} is overlapped with region_id={}, state: {}",
LOG_INFO(
log,
"range of region_id={} is overlapped with `Tombstone` region_id={}, {}",
region_id,
overlapped_region.first,
state.ShortDebugString());
extra_msg);
handleDestroy(overlapped_region.first, tmt, task_lock);
}
else if (state.state() == raft_serverpb::PeerState::Applying)
{
// In this case, the `overlapped_region` also has a snapshot applied in raftstore,
// and is pending to be applied in TiFlash.
auto r = RegionRangeKeys::makeComparableKeys(
TiKVKey::copyFrom(state.region().start_key()),
TiKVKey::copyFrom(state.region().end_key()));

if (RegionRangeKeys::isRangeOverlapped(new_range->comparableKeys(), r))
{
// If the range is still overlapped after the snapshot, there is a hard error.
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"range of region_id={} is overlapped with `Applying` region_id={}, {}",
region_id,
overlapped_region.first,
extra_msg);
}
else
{
LOG_INFO(
log,
"range of region_id={} is overlapped with `Applying` region_id={}, {}",
region_id,
overlapped_region.first,
extra_msg);
}
}
else
{
LOG_INFO(
log,
"range of region_id={} is overlapped with `Tombstone` region_id={}",
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"range of region_id={} is overlapped with region_id={}, {}",
region_id,
overlapped_region.first);
handleDestroy(overlapped_region.first, tmt, task_lock);
overlapped_region.first,
extra_msg);
}
}
}
Expand Down Expand Up @@ -161,6 +197,7 @@ void KVStore::onSnapshot(
{
RegionID region_id = new_region_wrap->id();

// 1. Try to clean stale data.
{
auto keyspace_id = new_region_wrap->getKeyspaceID();
auto table_id = new_region_wrap->getMappedTableID();
Expand Down Expand Up @@ -189,16 +226,19 @@ void KVStore::onSnapshot(
{
LOG_INFO(
log,
"clear old range before apply snapshot, region_id={} old_range={} new_range={} "
"region range changed before apply snapshot, region_id={} old_range={} new_range={} "
"keyspace={} table_id={}",
region_id,
old_key_range.toDebugString(),
new_key_range.toDebugString(),
keyspace_id,
table_id);
dm_storage->deleteRange(old_key_range, context.getSettingsRef());
// We must flush the deletion to the disk here, because we only flush new range when persisting this region later.
dm_storage->flushCache(context, old_key_range, /*try_until_succeed*/ true);
JaySon-Huang marked this conversation as resolved.
Show resolved Hide resolved
/// Previously, we clean `old_key_range` here. However, we can only clean `new_key_range` here, if there is also an overlapped snapshot in region worker queue.
/// Consider:
/// 1. there exists a region A of range [0..100)
/// 2. region A splitted into A' [0..50) and B [50..100)
/// 3. snapshot of B is applied into tiflash storage first
/// 4. when applying snapshot A -> A' of range [0..50), we should not clear the old range [0, 100) but only clean the new range
}
}
if constexpr (std::is_same_v<RegionPtrWrap, RegionPtrWithSnapshotFiles>)
Expand All @@ -225,6 +265,7 @@ void KVStore::onSnapshot(
static_assert(std::is_same_v<RegionPtrWrap, RegionPtrWithBlock>);
// Call `deleteRange` to delete data for range
dm_storage->deleteRange(new_key_range, context.getSettingsRef());
// We don't flushCache here, but flush as a whole in stage 2 in `tryFlushRegionCacheInStorage`.
}
}
catch (DB::Exception & e)
Expand All @@ -236,6 +277,7 @@ void KVStore::onSnapshot(
}
}

// 2. Dump data to RegionTable.
{
const auto range = new_region_wrap->getRange();
auto & region_table = tmt.getRegionTable();
Expand All @@ -261,6 +303,7 @@ void KVStore::onSnapshot(
// For `RegionPtrWithSnapshotFiles`, don't need to flush cache.
}

// Register the new Region.
RegionPtr new_region = new_region_wrap.base;
{
auto task_lock = genTaskLock();
Expand Down
17 changes: 17 additions & 0 deletions dbms/src/Storages/KVStore/MultiRaft/RegionRangeKeys.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,23 @@ class RegionRangeKeys : boost::noncopyable
KeyspaceID getKeyspaceID() const;
std::string toDebugString() const;

static bool isRangeOverlapped(const RegionRange & a, const RegionRange & b)
{
auto start = a.first.compare(b.first);
if (start == 0)
{
return true;
}
else if (start < 0)
{
return a.second.compare(b.first) > 0;
}
else
{
return b.second.compare(a.first) > 0;
}
}

private:
RegionRange ori;
std::pair<DecodedTiKVKeyPtr, DecodedTiKVKeyPtr> raw;
Expand Down
64 changes: 59 additions & 5 deletions dbms/src/Storages/KVStore/tests/gtest_kvstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,7 @@ try
RecordKVFormat::genKey(table_id, 0),
RecordKVFormat::genKey(table_id, 10000),
kvs.getProxyHelper());
// Fill data from 20 to 100.
GenMockSSTData(DMTestEnv::getMinimalTableInfo(table_id), table_id, region_id_str, 20, 100, 0);
std::vector<SSTView> sst_views{
SSTView{
Expand Down Expand Up @@ -1085,6 +1086,7 @@ try
if (ingest_using_split)
{
auto stats = storage->getStore()->getStoreStats();
// Including 0..20, 20..100, 100..inf.
ASSERT_EQ(3, stats.segment_count);
}

Expand All @@ -1097,6 +1099,7 @@ try
RecordKVFormat::genKey(table_id, 20000),
RecordKVFormat::genKey(table_id, 50000),
kvs.getProxyHelper());
// Fill data from 20100 to 20200.
GenMockSSTData(DMTestEnv::getMinimalTableInfo(table_id), table_id, region_id_str, 20100, 20200, 0);
std::vector<SSTView> sst_views{
SSTView{
Expand Down Expand Up @@ -1148,10 +1151,22 @@ try
ASSERT_EQ(0, gc_n);

auto stats = storage->getStore()->getStoreStats();
ASSERT_EQ(1, stats.segment_count);
ASSERT_EQ(0, stats.total_stable_size_on_disk);
ASSERT_EQ(0, stats.total_rows);
ASSERT_EQ(0, stats.total_size);
// The data of [20, 100), is not reclaimed during Apply Snapshot.
if (ingest_using_split)
{
ASSERT_EQ(3, stats.segment_count);
ASSERT_NE(0, stats.total_stable_size_on_disk);
ASSERT_EQ(80, stats.total_rows);
ASSERT_NE(0, stats.total_size);
}
else
{
// The only segment is not reclaimed.
ASSERT_EQ(1, stats.segment_count);
ASSERT_NE(0, stats.total_stable_size_on_disk);
ASSERT_EQ(180, stats.total_rows);
ASSERT_NE(0, stats.total_size);
}
}
}
CATCH
Expand Down Expand Up @@ -1294,7 +1309,7 @@ try
}
catch (Exception & e)
{
ASSERT_EQ(e.message(), "range of region_id=20 is overlapped with region_id=22, state: region { id: 22 }");
ASSERT_TRUE(e.message().rfind("range of region_id=20 is overlapped with region_id=22", 0) == 0);
}
}

Expand Down Expand Up @@ -1708,4 +1723,43 @@ TEST_F(RegionKVStoreOldTest, RegionRange)
}
}

TEST_F(RegionKVStoreOldTest, RegionRange2)
{
auto mustOverlap = [](std::string s1, std::string e1, std::string s2, std::string e2) {
auto r1 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s1), TiKVKey::copyFrom(e1));
auto r2 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s2), TiKVKey::copyFrom(e2));
ASSERT_TRUE(RegionRangeKeys::isRangeOverlapped(r1, r2));
ASSERT_TRUE(RegionRangeKeys::isRangeOverlapped(r2, r1));
};
auto mustNotOverlap = [](std::string s1, std::string e1, std::string s2, std::string e2) {
auto r1 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s1), TiKVKey::copyFrom(e1));
auto r2 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s2), TiKVKey::copyFrom(e2));
ASSERT_FALSE(RegionRangeKeys::isRangeOverlapped(r1, r2));
ASSERT_FALSE(RegionRangeKeys::isRangeOverlapped(r2, r1));
};
mustOverlap("", "a", "", "b");
mustOverlap("a", "", "b", "");
mustOverlap("", "", "b", "");
mustOverlap("", "", "", "a");
mustOverlap("", "", "a", "b");
mustOverlap("", "", "", "");

mustOverlap("a", "e", "", "f");
mustOverlap("a", "e", "", "");
mustOverlap("b", "e", "a", "");

mustOverlap("a", "e", "a", "e");
mustOverlap("a", "f", "c", "d");
mustOverlap("a", "f", "c", "f");

mustNotOverlap("a", "e", "e", "f");
mustNotOverlap("a", "e", "f", "g");
mustNotOverlap("", "e", "e", "f");
mustNotOverlap("a", "e", "e", "");
mustNotOverlap("", "e", "f", "g");
mustNotOverlap("a", "e", "f", "");
mustNotOverlap("", "e", "f", "");
mustNotOverlap("", "e", "e", "");
}

} // namespace DB::tests
77 changes: 77 additions & 0 deletions dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,4 +1164,81 @@ try
}
CATCH

TEST_F(RegionKVStoreTest, ApplyShrinkedRegion)
try
{
auto & ctx = TiFlashTestEnv::getGlobalContext();
ASSERT_NE(proxy_helper->sst_reader_interfaces.fn_key, nullptr);
UInt64 region_id = 1;
TableID table_id;

initStorages();
KVStore & kvs = getKVS();
table_id = proxy_instance->bootstrapTable(ctx, kvs, ctx.getTMTContext());
LOG_INFO(&Poco::Logger::get("Test"), "generated table_id {}", table_id);
proxy_instance->bootstrapWithRegion(kvs, ctx.getTMTContext(), region_id, std::nullopt);
auto kvr1 = kvs.getRegion(region_id);
auto r1 = proxy_instance->getRegion(region_id);
{
// Multiple files
MockSSTReader::getMockSSTData().clear();
MockSSTGenerator default_cf{902, 800, ColumnFamilyType::Default};
default_cf.insert(1, "v1");
default_cf.finish_file();
default_cf.insert(2, "v2");
default_cf.finish_file();
default_cf.insert(3, "v3");
default_cf.insert(4, "v4");
default_cf.finish_file();
default_cf.insert(5, "v5");
default_cf.insert(6, "v6");
default_cf.finish_file();
default_cf.insert(7, "v7");
default_cf.finish_file();
default_cf.freeze();
kvs.mutProxyHelperUnsafe()->sst_reader_interfaces = make_mock_sst_reader_interface();

auto make_meta = [&]() {
auto r2 = proxy_instance->getRegion(region_id);
auto modified_meta = r2->getState().region();
modified_meta.set_id(2);
modified_meta.set_start_key(RecordKVFormat::genKey(table_id, 1));
modified_meta.set_end_key(RecordKVFormat::genKey(table_id, 4));
modified_meta.add_peers()->set_id(2);
return modified_meta;
};
auto peer_id = kvr1->getMeta().peerId();
proxy_instance->debugAddRegions(
kvs,
ctx.getTMTContext(),
{2},
{{{RecordKVFormat::genKey(table_id, 0), RecordKVFormat::genKey(table_id, 4)}}});

// Overlap
EXPECT_THROW(
proxy_instance
->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false),
Exception);

LOG_INFO(log, "Set to applying");
CalvinNeo marked this conversation as resolved.
Show resolved Hide resolved
// region_state is "applying", but the key-range in proxy side still overlaps.
r1->mutState().set_state(raft_serverpb::PeerState::Applying);
ASSERT_EQ(proxy_helper->getRegionLocalState(1).state(), raft_serverpb::PeerState::Applying);
EXPECT_THROW(
proxy_instance
->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false),
Exception);


LOG_INFO(log, "Shrink region 1");
// region_state is "applying", but the key-range in proxy side is not overlap.
r1->mutState().set_state(raft_serverpb::PeerState::Applying);
r1->mutState().mutable_region()->set_start_key(RecordKVFormat::genKey(table_id, 0));
r1->mutState().mutable_region()->set_end_key(RecordKVFormat::genKey(table_id, 1));
proxy_instance
->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false);
}
}
CATCH

} // namespace DB::tests