diff --git a/dbms/src/Debug/MockKVStore/MockRaftStoreProxy.cpp b/dbms/src/Debug/MockKVStore/MockRaftStoreProxy.cpp index 907e3dd79dc..4a9e54e295d 100644 --- a/dbms/src/Debug/MockKVStore/MockRaftStoreProxy.cpp +++ b/dbms/src/Debug/MockKVStore/MockRaftStoreProxy.cpp @@ -674,7 +674,7 @@ std::tuple MockRaftStoreProxy::snapshot( } catch (const Exception & e) { - LOG_ERROR(log, "mock apply snapshot error {}", e.message()); + LOG_ERROR(log, "mock apply snapshot exception {}", e.message()); e.rethrow(); } LOG_FATAL(DB::Logger::get(), "Should not happen"); diff --git a/dbms/src/Storages/KVStore/MultiRaft/ApplySnapshot.cpp b/dbms/src/Storages/KVStore/MultiRaft/ApplySnapshot.cpp index 0f1bc76b438..2055fb335e2 100644 --- a/dbms/src/Storages/KVStore/MultiRaft/ApplySnapshot.cpp +++ b/dbms/src/Storages/KVStore/MultiRaft/ApplySnapshot.cpp @@ -80,8 +80,10 @@ void KVStore::checkAndApplyPreHandledSnapshot(const RegionPtrWrap & new_region, // engine may delete data unsafely. auto region_lock = region_manager.genRegionTaskLock(old_region->id()); old_region->setStateApplying(); - tmt.getRegionTable().tryWriteBlockByRegion(old_region); - tryFlushRegionCacheInStorage(tmt, *old_region, log); + // It is not worthy to call `tryWriteBlockByRegion` and `tryFlushRegionCacheInStorage` here, + // even if the written data is useful, it could be overwritten later in `onSnapshot`. + // Note that we must persistRegion. This is to ensure even if a restart happens before + // the apply snapshot is finished, TiFlash can correctly reject the read index requests persistRegion(*old_region, region_lock, PersistRegionReason::ApplySnapshotPrevRegion, ""); } } @@ -95,23 +97,57 @@ void KVStore::checkAndApplyPreHandledSnapshot(const RegionPtrWrap & new_region, if (overlapped_region.first != region_id) { auto state = getProxyHelper()->getRegionLocalState(overlapped_region.first); - if (state.state() != raft_serverpb::PeerState::Tombstone) + auto extra_msg = fmt::format( + "state={}, tiflash_state={}, new_region_state={}", + state.ShortDebugString(), + overlapped_region.second->getMeta().getRegionState().getBase().ShortDebugString(), + new_region->getMeta().getRegionState().getBase().ShortDebugString()); + if (state.state() == raft_serverpb::PeerState::Tombstone) { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "range of region_id={} is overlapped with region_id={}, state: {}", + LOG_INFO( + log, + "range of region_id={} is overlapped with `Tombstone` region_id={}, {}", region_id, overlapped_region.first, - state.ShortDebugString()); + extra_msg); + handleDestroy(overlapped_region.first, tmt, task_lock); + } + else if (state.state() == raft_serverpb::PeerState::Applying) + { + // In this case, the `overlapped_region` also has a snapshot applied in raftstore, + // and is pending to be applied in TiFlash. + auto r = RegionRangeKeys::makeComparableKeys( + TiKVKey::copyFrom(state.region().start_key()), + TiKVKey::copyFrom(state.region().end_key())); + + if (RegionRangeKeys::isRangeOverlapped(new_range->comparableKeys(), r)) + { + // If the range is still overlapped after the snapshot, there is a hard error. + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "range of region_id={} is overlapped with `Applying` region_id={}, {}", + region_id, + overlapped_region.first, + extra_msg); + } + else + { + LOG_INFO( + log, + "range of region_id={} is overlapped with `Applying` region_id={}, {}", + region_id, + overlapped_region.first, + extra_msg); + } } else { - LOG_INFO( - log, - "range of region_id={} is overlapped with `Tombstone` region_id={}", + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "range of region_id={} is overlapped with region_id={}, {}", region_id, - overlapped_region.first); - handleDestroy(overlapped_region.first, tmt, task_lock); + overlapped_region.first, + extra_msg); } } } @@ -161,6 +197,7 @@ void KVStore::onSnapshot( { RegionID region_id = new_region_wrap->id(); + // 1. Try to clean stale data. { auto keyspace_id = new_region_wrap->getKeyspaceID(); auto table_id = new_region_wrap->getMappedTableID(); @@ -189,16 +226,19 @@ void KVStore::onSnapshot( { LOG_INFO( log, - "clear old range before apply snapshot, region_id={} old_range={} new_range={} " + "region range changed before apply snapshot, region_id={} old_range={} new_range={} " "keyspace={} table_id={}", region_id, old_key_range.toDebugString(), new_key_range.toDebugString(), keyspace_id, table_id); - dm_storage->deleteRange(old_key_range, context.getSettingsRef()); - // We must flush the deletion to the disk here, because we only flush new range when persisting this region later. - dm_storage->flushCache(context, old_key_range, /*try_until_succeed*/ true); + /// Previously, we clean `old_key_range` here. However, we can only clean `new_key_range` here, if there is also an overlapped snapshot in region worker queue. + /// Consider: + /// 1. there exists a region A of range [0..100) + /// 2. region A splitted into A' [0..50) and B [50..100) + /// 3. snapshot of B is applied into tiflash storage first + /// 4. when applying snapshot A -> A' of range [0..50), we should not clear the old range [0, 100) but only clean the new range } } if constexpr (std::is_same_v) @@ -225,6 +265,7 @@ void KVStore::onSnapshot( static_assert(std::is_same_v); // Call `deleteRange` to delete data for range dm_storage->deleteRange(new_key_range, context.getSettingsRef()); + // We don't flushCache here, but flush as a whole in stage 2 in `tryFlushRegionCacheInStorage`. } } catch (DB::Exception & e) @@ -236,6 +277,7 @@ void KVStore::onSnapshot( } } + // 2. Dump data to RegionTable. { const auto range = new_region_wrap->getRange(); auto & region_table = tmt.getRegionTable(); @@ -261,6 +303,7 @@ void KVStore::onSnapshot( // For `RegionPtrWithSnapshotFiles`, don't need to flush cache. } + // Register the new Region. RegionPtr new_region = new_region_wrap.base; { auto task_lock = genTaskLock(); diff --git a/dbms/src/Storages/KVStore/MultiRaft/RegionRangeKeys.h b/dbms/src/Storages/KVStore/MultiRaft/RegionRangeKeys.h index 4fbea196227..31e3a0f95c9 100644 --- a/dbms/src/Storages/KVStore/MultiRaft/RegionRangeKeys.h +++ b/dbms/src/Storages/KVStore/MultiRaft/RegionRangeKeys.h @@ -64,6 +64,23 @@ class RegionRangeKeys : boost::noncopyable KeyspaceID getKeyspaceID() const; std::string toDebugString() const; + static bool isRangeOverlapped(const RegionRange & a, const RegionRange & b) + { + auto start = a.first.compare(b.first); + if (start == 0) + { + return true; + } + else if (start < 0) + { + return a.second.compare(b.first) > 0; + } + else + { + return b.second.compare(a.first) > 0; + } + } + private: RegionRange ori; std::pair raw; diff --git a/dbms/src/Storages/KVStore/tests/gtest_kvstore.cpp b/dbms/src/Storages/KVStore/tests/gtest_kvstore.cpp index 6387b8256c6..7a70e673fad 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_kvstore.cpp @@ -1057,6 +1057,7 @@ try RecordKVFormat::genKey(table_id, 0), RecordKVFormat::genKey(table_id, 10000), kvs.getProxyHelper()); + // Fill data from 20 to 100. GenMockSSTData(DMTestEnv::getMinimalTableInfo(table_id), table_id, region_id_str, 20, 100, 0); std::vector sst_views{ SSTView{ @@ -1085,6 +1086,7 @@ try if (ingest_using_split) { auto stats = storage->getStore()->getStoreStats(); + // Including 0..20, 20..100, 100..inf. ASSERT_EQ(3, stats.segment_count); } @@ -1097,6 +1099,7 @@ try RecordKVFormat::genKey(table_id, 20000), RecordKVFormat::genKey(table_id, 50000), kvs.getProxyHelper()); + // Fill data from 20100 to 20200. GenMockSSTData(DMTestEnv::getMinimalTableInfo(table_id), table_id, region_id_str, 20100, 20200, 0); std::vector sst_views{ SSTView{ @@ -1148,10 +1151,22 @@ try ASSERT_EQ(0, gc_n); auto stats = storage->getStore()->getStoreStats(); - ASSERT_EQ(1, stats.segment_count); - ASSERT_EQ(0, stats.total_stable_size_on_disk); - ASSERT_EQ(0, stats.total_rows); - ASSERT_EQ(0, stats.total_size); + // The data of [20, 100), is not reclaimed during Apply Snapshot. + if (ingest_using_split) + { + ASSERT_EQ(3, stats.segment_count); + ASSERT_NE(0, stats.total_stable_size_on_disk); + ASSERT_EQ(80, stats.total_rows); + ASSERT_NE(0, stats.total_size); + } + else + { + // The only segment is not reclaimed. + ASSERT_EQ(1, stats.segment_count); + ASSERT_NE(0, stats.total_stable_size_on_disk); + ASSERT_EQ(180, stats.total_rows); + ASSERT_NE(0, stats.total_size); + } } } CATCH @@ -1294,7 +1309,7 @@ try } catch (Exception & e) { - ASSERT_EQ(e.message(), "range of region_id=20 is overlapped with region_id=22, state: region { id: 22 }"); + ASSERT_TRUE(e.message().rfind("range of region_id=20 is overlapped with region_id=22", 0) == 0); } } @@ -1708,4 +1723,43 @@ TEST_F(RegionKVStoreOldTest, RegionRange) } } +TEST_F(RegionKVStoreOldTest, RegionRange2) +{ + auto mustOverlap = [](std::string s1, std::string e1, std::string s2, std::string e2) { + auto r1 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s1), TiKVKey::copyFrom(e1)); + auto r2 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s2), TiKVKey::copyFrom(e2)); + ASSERT_TRUE(RegionRangeKeys::isRangeOverlapped(r1, r2)); + ASSERT_TRUE(RegionRangeKeys::isRangeOverlapped(r2, r1)); + }; + auto mustNotOverlap = [](std::string s1, std::string e1, std::string s2, std::string e2) { + auto r1 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s1), TiKVKey::copyFrom(e1)); + auto r2 = RegionRangeKeys::makeComparableKeys(TiKVKey::copyFrom(s2), TiKVKey::copyFrom(e2)); + ASSERT_FALSE(RegionRangeKeys::isRangeOverlapped(r1, r2)); + ASSERT_FALSE(RegionRangeKeys::isRangeOverlapped(r2, r1)); + }; + mustOverlap("", "a", "", "b"); + mustOverlap("a", "", "b", ""); + mustOverlap("", "", "b", ""); + mustOverlap("", "", "", "a"); + mustOverlap("", "", "a", "b"); + mustOverlap("", "", "", ""); + + mustOverlap("a", "e", "", "f"); + mustOverlap("a", "e", "", ""); + mustOverlap("b", "e", "a", ""); + + mustOverlap("a", "e", "a", "e"); + mustOverlap("a", "f", "c", "d"); + mustOverlap("a", "f", "c", "f"); + + mustNotOverlap("a", "e", "e", "f"); + mustNotOverlap("a", "e", "f", "g"); + mustNotOverlap("", "e", "e", "f"); + mustNotOverlap("a", "e", "e", ""); + mustNotOverlap("", "e", "f", "g"); + mustNotOverlap("a", "e", "f", ""); + mustNotOverlap("", "e", "f", ""); + mustNotOverlap("", "e", "e", ""); +} + } // namespace DB::tests diff --git a/dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp b/dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp index e3ddb37da1b..8eb834110b1 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp @@ -1164,4 +1164,81 @@ try } CATCH +TEST_F(RegionKVStoreTest, ApplyShrinkedRegion) +try +{ + auto & ctx = TiFlashTestEnv::getGlobalContext(); + ASSERT_NE(proxy_helper->sst_reader_interfaces.fn_key, nullptr); + UInt64 region_id = 1; + TableID table_id; + + initStorages(); + KVStore & kvs = getKVS(); + table_id = proxy_instance->bootstrapTable(ctx, kvs, ctx.getTMTContext()); + LOG_INFO(&Poco::Logger::get("Test"), "generated table_id {}", table_id); + proxy_instance->bootstrapWithRegion(kvs, ctx.getTMTContext(), region_id, std::nullopt); + auto kvr1 = kvs.getRegion(region_id); + auto r1 = proxy_instance->getRegion(region_id); + { + // Multiple files + MockSSTReader::getMockSSTData().clear(); + MockSSTGenerator default_cf{902, 800, ColumnFamilyType::Default}; + default_cf.insert(1, "v1"); + default_cf.finish_file(); + default_cf.insert(2, "v2"); + default_cf.finish_file(); + default_cf.insert(3, "v3"); + default_cf.insert(4, "v4"); + default_cf.finish_file(); + default_cf.insert(5, "v5"); + default_cf.insert(6, "v6"); + default_cf.finish_file(); + default_cf.insert(7, "v7"); + default_cf.finish_file(); + default_cf.freeze(); + kvs.mutProxyHelperUnsafe()->sst_reader_interfaces = make_mock_sst_reader_interface(); + + auto make_meta = [&]() { + auto r2 = proxy_instance->getRegion(region_id); + auto modified_meta = r2->getState().region(); + modified_meta.set_id(2); + modified_meta.set_start_key(RecordKVFormat::genKey(table_id, 1)); + modified_meta.set_end_key(RecordKVFormat::genKey(table_id, 4)); + modified_meta.add_peers()->set_id(2); + return modified_meta; + }; + auto peer_id = kvr1->getMeta().peerId(); + proxy_instance->debugAddRegions( + kvs, + ctx.getTMTContext(), + {2}, + {{{RecordKVFormat::genKey(table_id, 0), RecordKVFormat::genKey(table_id, 4)}}}); + + // Overlap + EXPECT_THROW( + proxy_instance + ->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false), + Exception); + + LOG_INFO(log, "Set to applying"); + // region_state is "applying", but the key-range in proxy side still overlaps. + r1->mutState().set_state(raft_serverpb::PeerState::Applying); + ASSERT_EQ(proxy_helper->getRegionLocalState(1).state(), raft_serverpb::PeerState::Applying); + EXPECT_THROW( + proxy_instance + ->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false), + Exception); + + + LOG_INFO(log, "Shrink region 1"); + // region_state is "applying", but the key-range in proxy side is not overlap. + r1->mutState().set_state(raft_serverpb::PeerState::Applying); + r1->mutState().mutable_region()->set_start_key(RecordKVFormat::genKey(table_id, 0)); + r1->mutState().mutable_region()->set_end_key(RecordKVFormat::genKey(table_id, 1)); + proxy_instance + ->snapshot(kvs, ctx.getTMTContext(), 2, {default_cf}, make_meta(), peer_id, 0, 0, std::nullopt, false); + } +} +CATCH + } // namespace DB::tests