From 640c1033cdfe29264df722c0a512ecd4b32c4de6 Mon Sep 17 00:00:00 2001 From: hehechen Date: Wed, 22 Jun 2022 18:58:37 +0800 Subject: [PATCH] optimize ps v3 restore (#5163) ref pingcap/tiflash#4914 --- dbms/src/Storages/Page/V3/PageDirectory.cpp | 20 ++++++++++++------- dbms/src/Storages/Page/V3/PageDirectory.h | 8 +++++--- .../Storages/Page/V3/PageDirectoryFactory.cpp | 6 ++++-- .../Page/V3/tests/gtest_page_directory.cpp | 4 ++-- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 5eb275f5af5..951da42de1c 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -478,7 +478,7 @@ PageSize VersionedPageEntries::getEntriesByBlobIds( bool VersionedPageEntries::cleanOutdatedEntries( UInt64 lowest_seq, std::map> * normal_entries_to_deref, - PageEntriesV3 & entries_removed, + PageEntriesV3 * entries_removed, const PageLock & /*page_lock*/) { if (type == EditRecordType::VAR_EXTERNAL) @@ -541,7 +541,10 @@ bool VersionedPageEntries::cleanOutdatedEntries( { if (iter->second.being_ref_count == 1) { - entries_removed.emplace_back(iter->second.entry); + if (entries_removed) + { + entries_removed->emplace_back(iter->second.entry); + } iter = entries.erase(iter); } // The `being_ref_count` for this version is valid. While for older versions, @@ -551,7 +554,10 @@ bool VersionedPageEntries::cleanOutdatedEntries( else { // else there are newer "entry" in the version list, the outdated entries should be removed - entries_removed.emplace_back(iter->second.entry); + if (entries_removed) + { + entries_removed->emplace_back(iter->second.entry); + } iter = entries.erase(iter); } } @@ -564,7 +570,7 @@ bool VersionedPageEntries::cleanOutdatedEntries( return entries.empty() || (entries.size() == 1 && entries.begin()->second.isDelete()); } -bool VersionedPageEntries::derefAndClean(UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, const Int64 deref_count, PageEntriesV3 & entries_removed) +bool VersionedPageEntries::derefAndClean(UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, const Int64 deref_count, PageEntriesV3 * entries_removed) { auto page_lock = acquireLock(); if (type == EditRecordType::VAR_EXTERNAL) @@ -1239,7 +1245,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W return done_any_io; } -PageEntriesV3 PageDirectory::gcInMemEntries() +PageEntriesV3 PageDirectory::gcInMemEntries(bool return_removed_entries) { UInt64 lowest_seq = sequence.load(); @@ -1303,7 +1309,7 @@ PageEntriesV3 PageDirectory::gcInMemEntries() const bool all_deleted = iter->second->cleanOutdatedEntries( lowest_seq, &normal_entries_to_deref, - all_del_entries, + return_removed_entries ? &all_del_entries : nullptr, iter->second->acquireLock()); { @@ -1342,7 +1348,7 @@ PageEntriesV3 PageDirectory::gcInMemEntries() page_id, /*deref_ver=*/deref_counter.first, /*deref_count=*/deref_counter.second, - all_del_entries); + return_removed_entries ? &all_del_entries : nullptr); if (all_deleted) { diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index bd7c433022f..2f0f09f4e42 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -223,14 +223,14 @@ class VersionedPageEntries bool cleanOutdatedEntries( UInt64 lowest_seq, std::map> * normal_entries_to_deref, - PageEntriesV3 & entries_removed, + PageEntriesV3 * entries_removed, const PageLock & page_lock); bool derefAndClean( UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, Int64 deref_count, - PageEntriesV3 & entries_removed); + PageEntriesV3 * entries_removed); void collapseTo(UInt64 seq, PageIdV3Internal page_id, PageEntriesEdit & edit); @@ -360,7 +360,9 @@ class PageDirectory bool tryDumpSnapshot(const ReadLimiterPtr & read_limiter = nullptr, const WriteLimiterPtr & write_limiter = nullptr); - PageEntriesV3 gcInMemEntries(); + // Perform a GC for in-memory entries and return the removed entries. + // If `return_removed_entries` is false, then just return an empty set. + PageEntriesV3 gcInMemEntries(bool return_removed_entries = true); std::set getAliveExternalIds(NamespaceId ns_id) const; diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 483c5073ab5..968049a3273 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -44,7 +44,8 @@ PageDirectoryPtr PageDirectoryFactory::createFromReader(String storage_name, WAL // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. - dir->gcInMemEntries(); + // It's no need to remove the expired entries in BlobStore, so skip filling removed_entries to imporve performance. + dir->gcInMemEntries(/*return_removed_entries=*/false); LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory", storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); if (blob_stats) @@ -84,7 +85,8 @@ PageDirectoryPtr PageDirectoryFactory::createFromEdit(String storage_name, FileP // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. - dir->gcInMemEntries(); + // It's no need to remove the expired entries in BlobStore when restore, so no need to fill removed_entries. + dir->gcInMemEntries(/*return_removed_entries=*/false); if (blob_stats) { diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 83e07f75d37..6d6ef41630f 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -644,14 +644,14 @@ class VersionedEntriesTest : public ::testing::Test { DerefCounter deref_counter; PageEntriesV3 removed_entries; - bool all_removed = entries.cleanOutdatedEntries(seq, &deref_counter, removed_entries, entries.acquireLock()); + bool all_removed = entries.cleanOutdatedEntries(seq, &deref_counter, &removed_entries, entries.acquireLock()); return {all_removed, removed_entries, deref_counter}; } std::tuple runDeref(UInt64 seq, PageVersion ver, Int64 decrease_num) { PageEntriesV3 removed_entries; - bool all_removed = entries.derefAndClean(seq, buildV3Id(TEST_NAMESPACE_ID, page_id), ver, decrease_num, removed_entries); + bool all_removed = entries.derefAndClean(seq, buildV3Id(TEST_NAMESPACE_ID, page_id), ver, decrease_num, &removed_entries); return {all_removed, removed_entries}; }