diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 6487cd33921..d8c20b40c9f 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -164,6 +165,10 @@ struct ContextShared IORateLimiter io_rate_limiter; PageStorageRunMode storage_run_mode = PageStorageRunMode::ONLY_V3; DM::GlobalStoragePoolPtr global_storage_pool; + + /// The PS instance available on Write Node. + UniversalPageStorageServicePtr ps_write; + TiFlashSecurityConfigPtr security_config; /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. @@ -1545,22 +1550,35 @@ static bool isPageStorageV2Existed(const PathPool & path_pool) static bool isPageStorageV3Existed(const PathPool & path_pool) { + const std::vector path_prefixes = { + PathPool::log_path_prefix, + PathPool::data_path_prefix, + PathPool::meta_path_prefix, + PathPool::kvstore_path_prefix, + }; for (const auto & path : path_pool.listGlobalPagePaths()) { - Poco::File dir(path); - if (!dir.exists()) - continue; - - std::vector files; - dir.list(files); - if (!files.empty()) + for (const auto & path_prefix : path_prefixes) { - return true; + Poco::File dir(path + "/" + path_prefix); + if (dir.exists()) + return true; } } return false; } +static bool isWriteNodeUniPSExisted(const PathPool & path_pool) +{ + for (const auto & path : path_pool.listGlobalPagePaths()) + { + Poco::File dir(path + "/" + PathPool::write_uni_path_prefix); + if (dir.exists()) + return true; + } + return false; +} + void Context::initializePageStorageMode(const PathPool & path_pool, UInt64 storage_page_format_version) { auto lock = getLock(); @@ -1577,21 +1595,36 @@ void Context::initializePageStorageMode(const PathPool & path_pool, UInt64 stora case PageFormat::V1: case PageFormat::V2: { - if (isPageStorageV3Existed(path_pool)) + if (isPageStorageV3Existed(path_pool) || isWriteNodeUniPSExisted(path_pool)) { - throw Exception("Invalid config `storage.format_version`, Current page V3 data exist. But using the PageFormat::V2." + throw Exception("Invalid config `storage.format_version`, newer format page data exist. But using the PageFormat::V2." "If you are downgrading the format_version for this TiFlash node, you need to rebuild the data from scratch.", ErrorCodes::LOGICAL_ERROR); } - // not exist V3 + // not exist newer format page data shared->storage_run_mode = PageStorageRunMode::ONLY_V2; return; } case PageFormat::V3: { + if (isWriteNodeUniPSExisted(path_pool)) + { + throw Exception("Invalid config `storage.format_version`, newer format page data exist. But using the PageFormat::V3." + "If you are downgrading the format_version for this TiFlash node, you need to rebuild the data from scratch.", + ErrorCodes::LOGICAL_ERROR); + } shared->storage_run_mode = isPageStorageV2Existed(path_pool) ? PageStorageRunMode::MIX_MODE : PageStorageRunMode::ONLY_V3; return; } + case PageFormat::V4: + { + if (isPageStorageV2Existed(path_pool) || isPageStorageV3Existed(path_pool)) + { + throw Exception("Uni PS can only be enabled on a fresh start", ErrorCodes::LOGICAL_ERROR); + } + shared->storage_run_mode = PageStorageRunMode::UNI_PS; + return; + } default: throw Exception(fmt::format("Can't detect the format version of Page [page_version={}]", storage_page_format_version), ErrorCodes::LOGICAL_ERROR); @@ -1617,6 +1650,7 @@ bool Context::initializeGlobalStoragePoolIfNeed(const PathPool & path_pool) { // GlobalStoragePool may be initialized many times in some test cases for restore. LOG_WARNING(shared->log, "GlobalStoragePool has already been initialized."); + shared->global_storage_pool->shutdown(); } CurrentMetrics::set(CurrentMetrics::GlobalStorageRunMode, static_cast(shared->storage_run_mode)); if (shared->storage_run_mode == PageStorageRunMode::MIX_MODE || shared->storage_run_mode == PageStorageRunMode::ONLY_V3) @@ -1646,6 +1680,43 @@ DM::GlobalStoragePoolPtr Context::getGlobalStoragePool() const return shared->global_storage_pool; } +void Context::initializeWriteNodePageStorageIfNeed(const PathPool & path_pool) +{ + auto lock = getLock(); + if (shared->storage_run_mode == PageStorageRunMode::UNI_PS) + { + if (shared->ps_write) + { + // GlobalStoragePool may be initialized many times in some test cases for restore. + LOG_WARNING(shared->log, "GlobalUniversalPageStorage(WriteNode) has already been initialized."); + } + PageStorageConfig config; + shared->ps_write = UniversalPageStorageService::create( // + *this, + "write", + path_pool.getPSDiskDelegatorGlobalMulti(PathPool::write_uni_path_prefix), + config); + LOG_INFO(shared->log, "initialized GlobalUniversalPageStorage(WriteNode)"); + } + else + { + shared->ps_write = nullptr; + } +} + +UniversalPageStoragePtr Context::getWriteNodePageStorage() const +{ + auto lock = getLock(); + if (shared->ps_write) + { + return shared->ps_write->getUniversalPageStorage(); + } + else + { + return nullptr; + } +} + UInt16 Context::getTCPPort() const { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 9ff6aa2500c..9c98aa7a7b4 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -123,6 +123,9 @@ using Dependencies = std::vector; using TableAndCreateAST = std::pair; using TableAndCreateASTs = std::map; +class UniversalPageStorage; +using UniversalPageStoragePtr = std::shared_ptr; + /** A set of known objects that can be used in the query. * Consists of a shared part (always common to all sessions and queries) * and copied part (which can be its own for each session or query). @@ -426,6 +429,9 @@ class Context bool initializeGlobalStoragePoolIfNeed(const PathPool & path_pool); DM::GlobalStoragePoolPtr getGlobalStoragePool() const; + void initializeWriteNodePageStorageIfNeed(const PathPool & path_pool); + UniversalPageStoragePtr getWriteNodePageStorage() const; + /// Call after initialization before using system logs. Call for global context. void initializeSystemLogs(); diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 3867e27a9cc..54d71e322e7 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -1104,6 +1104,8 @@ int Server::main(const std::vector & /*args*/) global_context->initializeGlobalStoragePoolIfNeed(global_context->getPathPool()); LOG_INFO(log, "Global PageStorage run mode is {}", static_cast(global_context->getPageStorageRunMode())); + global_context->initializeWriteNodePageStorageIfNeed(global_context->getPathPool()); + /// Initialize RateLimiter. global_context->initializeRateLimiter(config(), bg_pool, blockable_bg_pool); diff --git a/dbms/src/Server/tests/gtest_server_config.cpp b/dbms/src/Server/tests/gtest_server_config.cpp index c493260cd45..5b5da364df6 100644 --- a/dbms/src/Server/tests/gtest_server_config.cpp +++ b/dbms/src/Server/tests/gtest_server_config.cpp @@ -369,6 +369,11 @@ dt_open_file_max_idle_seconds = 20 dt_page_gc_low_write_prob = 0.2 )"}; auto & global_ctx = TiFlashTestEnv::getGlobalContext(); + if (global_ctx.getPageStorageRunMode() == PageStorageRunMode::UNI_PS) + { + // don't support reload uni ps config through region persister + return; + } auto & global_path_pool = global_ctx.getPathPool(); RegionManager region_manager; RegionPersister persister(global_ctx, region_manager); @@ -445,6 +450,11 @@ dt_page_gc_low_write_prob = 0.2 )"}; auto & global_ctx = TiFlashTestEnv::getGlobalContext(); + if (global_ctx.getPageStorageRunMode() == PageStorageRunMode::UNI_PS) + { + // don't support reload uni ps config through storage pool + return; + } std::unique_ptr path_pool = std::make_unique(global_ctx.getPathPool().withTable("test", "t1", false)); std::unique_ptr storage_pool = std::make_unique(global_ctx, /*ns_id*/ 100, *path_pool, "test.t1"); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index e8e13f5be2e..a5ed476ad7b 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -309,7 +309,7 @@ SegmentPtr Segment::restoreSegment( // return segment; } -void Segment::serialize(WriteBatch & wb) +void Segment::serialize(WriteBatchWrapper & wb) { MemoryWriteBuffer buf(0, SEGMENT_BUFFER_SIZE); writeIntBinary(STORAGE_FORMAT_CURRENT.segment, buf); diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index f728266308b..e1234761b28 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -143,7 +143,7 @@ class Segment static SegmentPtr restoreSegment(const LoggerPtr & parent_log, DMContext & context, PageIdU64 segment_id); - void serialize(WriteBatch & wb); + void serialize(WriteBatchWrapper & wb); /// Attach a new ColumnFile into the Segment. The ColumnFile will be added to MemFileSet and flushed to disk later. /// The block data of the passed in ColumnFile should be placed on disk before calling this function. diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 06d5676d7b5..01fa116e863 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -73,7 +73,7 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang this->files = files_; } -void StableValueSpace::saveMeta(WriteBatch & meta_wb) +void StableValueSpace::saveMeta(WriteBatchWrapper & meta_wb) { MemoryWriteBuffer buf(0, 8192); writeIntBinary(STORAGE_FORMAT_CURRENT.stable, buf); diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.h b/dbms/src/Storages/DeltaMerge/StableValueSpace.h index a74d0860949..bee8201b554 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.h @@ -62,7 +62,7 @@ class StableValueSpace : public std::enable_shared_from_this void setFiles(const DMFiles & files_, const RowKeyRange & range, DMContext * dm_context = nullptr); PageIdU64 getId() const { return id; } - void saveMeta(WriteBatch & meta_wb); + void saveMeta(WriteBatchWrapper & meta_wb); size_t getRows() const; size_t getBytes() const; diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index 02281a29152..d3840e81fcc 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -48,12 +49,6 @@ extern const char force_set_dtfile_exist_when_acquire_id[]; namespace DM { -enum class StorageType -{ - Log = 1, - Data = 2, - Meta = 3, -}; PageStorageConfig extractConfig(const Settings & settings, StorageType subtype) { @@ -88,19 +83,19 @@ PageStorageConfig extractConfig(const Settings & settings, StorageType subtype) GlobalStoragePool::GlobalStoragePool(const PathPool & path_pool, Context & global_ctx, const Settings & settings) : log_storage(PageStorage::create("__global__.log", - path_pool.getPSDiskDelegatorGlobalMulti("log"), + path_pool.getPSDiskDelegatorGlobalMulti(PathPool::log_path_prefix), extractConfig(settings, StorageType::Log), global_ctx.getFileProvider(), global_ctx, true)) , data_storage(PageStorage::create("__global__.data", - path_pool.getPSDiskDelegatorGlobalMulti("data"), + path_pool.getPSDiskDelegatorGlobalMulti(PathPool::data_path_prefix), extractConfig(settings, StorageType::Data), global_ctx.getFileProvider(), global_ctx, true)) , meta_storage(PageStorage::create("__global__.meta", - path_pool.getPSDiskDelegatorGlobalMulti("meta"), + path_pool.getPSDiskDelegatorGlobalMulti(PathPool::meta_path_prefix), extractConfig(settings, StorageType::Meta), global_ctx.getFileProvider(), global_ctx, @@ -182,6 +177,7 @@ StoragePool::StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPo , run_mode(global_ctx.getPageStorageRunMode()) , ns_id(ns_id_) , storage_path_pool(storage_path_pool_) + , uni_ps(global_ctx.getWriteNodePageStorage()) , global_context(global_ctx) , storage_pool_metrics(CurrentMetrics::StoragePoolV3Only, 0) { @@ -205,13 +201,13 @@ StoragePool::StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPo extractConfig(global_context.getSettingsRef(), StorageType::Meta), global_context.getFileProvider(), global_context); - log_storage_reader = std::make_shared(run_mode, ns_id, log_storage_v2, /*storage_v3_*/ nullptr, nullptr); - data_storage_reader = std::make_shared(run_mode, ns_id, data_storage_v2, /*storage_v3_*/ nullptr, nullptr); - meta_storage_reader = std::make_shared(run_mode, ns_id, meta_storage_v2, /*storage_v3_*/ nullptr, nullptr); + log_storage_reader = std::make_shared(run_mode, StorageType::Log, ns_id, log_storage_v2, /*storage_v3_*/ nullptr, /*uni_ps_*/ nullptr, nullptr); + data_storage_reader = std::make_shared(run_mode, StorageType::Data, ns_id, data_storage_v2, /*storage_v3_*/ nullptr, /*uni_ps_*/ nullptr, nullptr); + meta_storage_reader = std::make_shared(run_mode, StorageType::Meta, ns_id, meta_storage_v2, /*storage_v3_*/ nullptr, /*uni_ps_*/ nullptr, nullptr); - log_storage_writer = std::make_shared(run_mode, log_storage_v2, /*storage_v3_*/ nullptr); - data_storage_writer = std::make_shared(run_mode, data_storage_v2, /*storage_v3_*/ nullptr); - meta_storage_writer = std::make_shared(run_mode, meta_storage_v2, /*storage_v3_*/ nullptr); + log_storage_writer = std::make_shared(run_mode, StorageType::Log, log_storage_v2, /*storage_v3_*/ nullptr, /*uni_ps_*/ nullptr); + data_storage_writer = std::make_shared(run_mode, StorageType::Data, data_storage_v2, /*storage_v3_*/ nullptr, /*uni_ps_*/ nullptr); + meta_storage_writer = std::make_shared(run_mode, StorageType::Meta, meta_storage_v2, /*storage_v3_*/ nullptr, /*uni_ps_*/ nullptr); break; } case PageStorageRunMode::ONLY_V3: @@ -221,13 +217,13 @@ StoragePool::StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPo data_storage_v3 = global_storage_pool->data_storage; meta_storage_v3 = global_storage_pool->meta_storage; - log_storage_reader = std::make_shared(run_mode, ns_id, /*storage_v2_*/ nullptr, log_storage_v3, nullptr); - data_storage_reader = std::make_shared(run_mode, ns_id, /*storage_v2_*/ nullptr, data_storage_v3, nullptr); - meta_storage_reader = std::make_shared(run_mode, ns_id, /*storage_v2_*/ nullptr, meta_storage_v3, nullptr); + log_storage_reader = std::make_shared(run_mode, StorageType::Log, ns_id, /*storage_v2_*/ nullptr, log_storage_v3, /*uni_ps_*/ nullptr, nullptr); + data_storage_reader = std::make_shared(run_mode, StorageType::Data, ns_id, /*storage_v2_*/ nullptr, data_storage_v3, /*uni_ps_*/ nullptr, nullptr); + meta_storage_reader = std::make_shared(run_mode, StorageType::Meta, ns_id, /*storage_v2_*/ nullptr, meta_storage_v3, /*uni_ps_*/ nullptr, nullptr); - log_storage_writer = std::make_shared(run_mode, /*storage_v2_*/ nullptr, log_storage_v3); - data_storage_writer = std::make_shared(run_mode, /*storage_v2_*/ nullptr, data_storage_v3); - meta_storage_writer = std::make_shared(run_mode, /*storage_v2_*/ nullptr, meta_storage_v3); + log_storage_writer = std::make_shared(run_mode, StorageType::Log, /*storage_v2_*/ nullptr, log_storage_v3, /*uni_ps_*/ nullptr); + data_storage_writer = std::make_shared(run_mode, StorageType::Data, /*storage_v2_*/ nullptr, data_storage_v3, /*uni_ps_*/ nullptr); + meta_storage_writer = std::make_shared(run_mode, StorageType::Meta, /*storage_v2_*/ nullptr, meta_storage_v3, /*uni_ps_*/ nullptr); break; } case PageStorageRunMode::MIX_MODE: @@ -277,13 +273,24 @@ StoragePool::StoragePool(Context & global_ctx, NamespaceId ns_id_, StoragePathPo /* no_more_write_to_v2 */ true); } - log_storage_reader = std::make_shared(run_mode, ns_id, log_storage_v2, log_storage_v3, nullptr); - data_storage_reader = std::make_shared(run_mode, ns_id, data_storage_v2, data_storage_v3, nullptr); - meta_storage_reader = std::make_shared(run_mode, ns_id, meta_storage_v2, meta_storage_v3, nullptr); + log_storage_reader = std::make_shared(run_mode, StorageType::Log, ns_id, log_storage_v2, log_storage_v3, /*uni_ps_*/ nullptr, nullptr); + data_storage_reader = std::make_shared(run_mode, StorageType::Data, ns_id, data_storage_v2, data_storage_v3, /*uni_ps_*/ nullptr, nullptr); + meta_storage_reader = std::make_shared(run_mode, StorageType::Meta, ns_id, meta_storage_v2, meta_storage_v3, /*uni_ps_*/ nullptr, nullptr); - log_storage_writer = std::make_shared(run_mode, log_storage_v2, log_storage_v3); - data_storage_writer = std::make_shared(run_mode, data_storage_v2, data_storage_v3); - meta_storage_writer = std::make_shared(run_mode, meta_storage_v2, meta_storage_v3); + log_storage_writer = std::make_shared(run_mode, StorageType::Log, log_storage_v2, log_storage_v3, /*uni_ps_*/ nullptr); + data_storage_writer = std::make_shared(run_mode, StorageType::Data, data_storage_v2, data_storage_v3, /*uni_ps_*/ nullptr); + meta_storage_writer = std::make_shared(run_mode, StorageType::Meta, meta_storage_v2, meta_storage_v3, /*uni_ps_*/ nullptr); + break; + } + case PageStorageRunMode::UNI_PS: + { + log_storage_reader = std::make_shared(run_mode, StorageType::Log, ns_id, /*storage_v2_*/ nullptr, /*storage_v3_*/ nullptr, uni_ps, nullptr); + data_storage_reader = std::make_shared(run_mode, StorageType::Data, ns_id, /*storage_v2_*/ nullptr, /*storage_v3_*/ nullptr, uni_ps, nullptr); + meta_storage_reader = std::make_shared(run_mode, StorageType::Meta, ns_id, /*storage_v2_*/ nullptr, /*storage_v3_*/ nullptr, uni_ps, nullptr); + + log_storage_writer = std::make_shared(run_mode, StorageType::Log, /*storage_v2_*/ nullptr, /*storage_v3_*/ nullptr, uni_ps); + data_storage_writer = std::make_shared(run_mode, StorageType::Data, /*storage_v2_*/ nullptr, /*storage_v3_*/ nullptr, uni_ps); + meta_storage_writer = std::make_shared(run_mode, StorageType::Meta, /*storage_v2_*/ nullptr, /*storage_v3_*/ nullptr, uni_ps); break; } default: @@ -297,8 +304,8 @@ void StoragePool::forceTransformMetaV2toV3() throw Exception(fmt::format("Transform meta must run under mix mode [run_mode={}]", static_cast(run_mode))); assert(meta_storage_v2 != nullptr); assert(meta_storage_v3 != nullptr); - auto meta_transform_storage_writer = std::make_shared(run_mode, meta_storage_v2, meta_storage_v3); - auto meta_transform_storage_reader = std::make_shared(run_mode, ns_id, meta_storage_v2, meta_storage_v3, nullptr); + auto meta_transform_storage_writer = std::make_shared(run_mode, StorageType::Meta, meta_storage_v2, meta_storage_v3, /*uni_ps_*/ nullptr); + auto meta_transform_storage_reader = std::make_shared(run_mode, StorageType::Meta, ns_id, meta_storage_v2, meta_storage_v3, /*uni_ps_*/ nullptr, nullptr); Pages pages_transform = {}; auto meta_transform_acceptor = [&](const DB::Page & page) { @@ -333,7 +340,7 @@ void StoragePool::forceTransformMetaV2toV3() } // Will rewrite into V3. - meta_transform_storage_writer->write(std::move(write_batch_transform), nullptr); + meta_transform_storage_writer->writeIntoV3(std::move(write_batch_transform), nullptr); // DEL must call after rewrite. meta_transform_storage_writer->writeIntoV2(std::move(write_batch_del_v2), nullptr); @@ -351,7 +358,7 @@ void StoragePool::forceTransformDataV2toV3() throw Exception(fmt::format("Transform meta must run under mix mode [run_mode={}]", static_cast(run_mode))); assert(data_storage_v2 != nullptr); assert(data_storage_v3 != nullptr); - auto data_transform_storage_writer = std::make_shared(run_mode, data_storage_v2, data_storage_v3); + auto data_transform_storage_writer = std::make_shared(run_mode, StorageType::Data, data_storage_v2, data_storage_v3, /*uni_ps_*/ nullptr); auto snapshot = data_storage_v2->getSnapshot("transformDataV2toV3"); auto * v2_snap = toV2ConcreteSnapshot(snapshot); @@ -498,13 +505,13 @@ PageStorageRunMode StoragePool::restore() meta_storage_v2 = nullptr; // Must init by PageStorageRunMode::ONLY_V3 - log_storage_reader = std::make_shared(PageStorageRunMode::ONLY_V3, ns_id, /*storage_v2_*/ nullptr, log_storage_v3, nullptr); - data_storage_reader = std::make_shared(PageStorageRunMode::ONLY_V3, ns_id, /*storage_v2_*/ nullptr, data_storage_v3, nullptr); - meta_storage_reader = std::make_shared(PageStorageRunMode::ONLY_V3, ns_id, /*storage_v2_*/ nullptr, meta_storage_v3, nullptr); + log_storage_reader = std::make_shared(PageStorageRunMode::ONLY_V3, StorageType::Log, ns_id, /*storage_v2_*/ nullptr, log_storage_v3, /*uni_ps_*/ nullptr, nullptr); + data_storage_reader = std::make_shared(PageStorageRunMode::ONLY_V3, StorageType::Data, ns_id, /*storage_v2_*/ nullptr, data_storage_v3, /*uni_ps_*/ nullptr, nullptr); + meta_storage_reader = std::make_shared(PageStorageRunMode::ONLY_V3, StorageType::Meta, ns_id, /*storage_v2_*/ nullptr, meta_storage_v3, /*uni_ps_*/ nullptr, nullptr); - log_storage_writer = std::make_shared(PageStorageRunMode::ONLY_V3, /*storage_v2_*/ nullptr, log_storage_v3); - data_storage_writer = std::make_shared(PageStorageRunMode::ONLY_V3, /*storage_v2_*/ nullptr, data_storage_v3); - meta_storage_writer = std::make_shared(PageStorageRunMode::ONLY_V3, /*storage_v2_*/ nullptr, meta_storage_v3); + log_storage_writer = std::make_shared(PageStorageRunMode::ONLY_V3, StorageType::Log, /*storage_v2_*/ nullptr, log_storage_v3, /*uni_ps_*/ nullptr); + data_storage_writer = std::make_shared(PageStorageRunMode::ONLY_V3, StorageType::Data, /*storage_v2_*/ nullptr, data_storage_v3, /*uni_ps_*/ nullptr); + meta_storage_writer = std::make_shared(PageStorageRunMode::ONLY_V3, StorageType::Meta, /*storage_v2_*/ nullptr, meta_storage_v3, /*uni_ps_*/ nullptr); max_log_page_id = log_storage_v3->getMaxId(); max_data_page_id = data_storage_v3->getMaxId(); @@ -522,6 +529,13 @@ PageStorageRunMode StoragePool::restore() } break; } + case PageStorageRunMode::UNI_PS: + { + max_log_page_id = uni_ps->getMaxIdAfterRestart(); + max_data_page_id = uni_ps->getMaxIdAfterRestart(); + max_meta_page_id = uni_ps->getMaxIdAfterRestart(); + break; + } default: throw Exception(fmt::format("Unknown PageStorageRunMode {}", static_cast(run_mode)), ErrorCodes::LOGICAL_ERROR); } @@ -567,6 +581,16 @@ void StoragePool::startup(ExternalPageCallbacks && callbacks) gc_handle = global_context.getBackgroundPool().addTask([this] { return this->gc(global_context.getSettingsRef()); }); break; } + case PageStorageRunMode::UNI_PS: + { + // For uni ps, the GC is handled by `UniversalPageStorageService`, register callbacks with prefix for this table + UniversalExternalPageCallbacks us_callbacks; + us_callbacks.remover = std::move(callbacks.remover); + us_callbacks.scanner = std::move(callbacks.scanner); + us_callbacks.prefix = UniversalPageIdFormat::toFullPrefix(StorageType::Data, ns_id); + uni_ps->registerUniversalExternalPagesCallbacks(us_callbacks); + break; + } default: throw Exception(fmt::format("Unknown PageStorageRunMode {}", static_cast(run_mode)), ErrorCodes::LOGICAL_ERROR); } @@ -606,6 +630,11 @@ void StoragePool::shutdown() data_storage_v3->unregisterExternalPagesCallbacks(ns_id); break; } + case PageStorageRunMode::UNI_PS: + { + uni_ps->unregisterUniversalExternalPagesCallbacks(UniversalPageIdFormat::toFullPrefix(StorageType::Data, ns_id)); + break; + } default: throw Exception(fmt::format("Unknown PageStorageRunMode {}", static_cast(run_mode)), ErrorCodes::LOGICAL_ERROR); } @@ -633,7 +662,7 @@ bool StoragePool::doV2Gc(const Settings & settings) bool StoragePool::gc(const Settings & settings, const Seconds & try_gc_period) { - if (run_mode == PageStorageRunMode::ONLY_V3) + if (run_mode == PageStorageRunMode::ONLY_V3 || run_mode == PageStorageRunMode::UNI_PS) return false; { @@ -697,19 +726,27 @@ PageIdU64 StoragePool::newDataPageIdForDTFile(StableDiskDelegator & delegator, c } template -inline static PageReader newReader(const PageStorageRunMode run_mode, const NamespaceId ns_id, T & storage_v2, T & storage_v3, ReadLimiterPtr read_limiter, bool snapshot_read, const String & tracing_id) +inline static PageReader newReader(const PageStorageRunMode run_mode, StorageType tag, const NamespaceId ns_id, T & storage_v2, T & storage_v3, UniversalPageStoragePtr uni_ps, ReadLimiterPtr read_limiter, bool snapshot_read, const String & tracing_id) { switch (run_mode) { case PageStorageRunMode::ONLY_V2: - return PageReader(run_mode, ns_id, storage_v2, nullptr, snapshot_read ? storage_v2->getSnapshot(tracing_id) : nullptr, read_limiter); + return PageReader(run_mode, tag, ns_id, storage_v2, nullptr, /*uni_ps*/ nullptr, snapshot_read ? storage_v2->getSnapshot(tracing_id) : nullptr, read_limiter); case PageStorageRunMode::ONLY_V3: - return PageReader(run_mode, ns_id, nullptr, storage_v3, snapshot_read ? storage_v3->getSnapshot(tracing_id) : nullptr, read_limiter); + return PageReader(run_mode, tag, ns_id, nullptr, storage_v3, /*uni_ps*/ nullptr, snapshot_read ? storage_v3->getSnapshot(tracing_id) : nullptr, read_limiter); case PageStorageRunMode::MIX_MODE: - return PageReader(run_mode, ns_id, storage_v2, storage_v3, snapshot_read ? std::make_shared(storage_v2->getSnapshot(fmt::format("{}-v2", tracing_id)), // - storage_v3->getSnapshot(fmt::format("{}-v3", tracing_id))) - : nullptr, - read_limiter); + return PageReader( + run_mode, + tag, + ns_id, + storage_v2, + storage_v3, + /*uni_ps*/ nullptr, + snapshot_read ? std::make_shared(storage_v2->getSnapshot(fmt::format("{}-v2", tracing_id)), storage_v3->getSnapshot(fmt::format("{}-v3", tracing_id))) + : nullptr, + read_limiter); + case PageStorageRunMode::UNI_PS: + return PageReader(run_mode, tag, ns_id, nullptr, nullptr, uni_ps, snapshot_read ? uni_ps->getSnapshot(tracing_id) : nullptr, read_limiter); default: throw Exception(fmt::format("Unknown PageStorageRunMode {}", static_cast(run_mode)), ErrorCodes::LOGICAL_ERROR); } @@ -717,32 +754,32 @@ inline static PageReader newReader(const PageStorageRunMode run_mode, const Name PageReader StoragePool::newLogReader(ReadLimiterPtr read_limiter, bool snapshot_read, const String & tracing_id) { - return newReader(run_mode, ns_id, log_storage_v2, log_storage_v3, read_limiter, snapshot_read, tracing_id); + return newReader(run_mode, StorageType::Log, ns_id, log_storage_v2, log_storage_v3, uni_ps, read_limiter, snapshot_read, tracing_id); } PageReader StoragePool::newLogReader(ReadLimiterPtr read_limiter, PageStorage::SnapshotPtr & snapshot) { - return PageReader(run_mode, ns_id, log_storage_v2, log_storage_v3, snapshot, read_limiter); + return PageReader(run_mode, StorageType::Log, ns_id, log_storage_v2, log_storage_v3, uni_ps, snapshot, read_limiter); } PageReader StoragePool::newDataReader(ReadLimiterPtr read_limiter, bool snapshot_read, const String & tracing_id) { - return newReader(run_mode, ns_id, data_storage_v2, data_storage_v3, read_limiter, snapshot_read, tracing_id); + return newReader(run_mode, StorageType::Data, ns_id, data_storage_v2, data_storage_v3, uni_ps, read_limiter, snapshot_read, tracing_id); } PageReader StoragePool::newDataReader(ReadLimiterPtr read_limiter, PageStorage::SnapshotPtr & snapshot) { - return PageReader(run_mode, ns_id, data_storage_v2, data_storage_v3, snapshot, read_limiter); + return PageReader(run_mode, StorageType::Data, ns_id, data_storage_v2, data_storage_v3, uni_ps, snapshot, read_limiter); } PageReader StoragePool::newMetaReader(ReadLimiterPtr read_limiter, bool snapshot_read, const String & tracing_id) { - return newReader(run_mode, ns_id, meta_storage_v2, meta_storage_v3, read_limiter, snapshot_read, tracing_id); + return newReader(run_mode, StorageType::Meta, ns_id, meta_storage_v2, meta_storage_v3, uni_ps, read_limiter, snapshot_read, tracing_id); } PageReader StoragePool::newMetaReader(ReadLimiterPtr read_limiter, PageStorage::SnapshotPtr & snapshot) { - return PageReader(run_mode, ns_id, meta_storage_v2, meta_storage_v3, snapshot, read_limiter); + return PageReader(run_mode, StorageType::Meta, ns_id, meta_storage_v2, meta_storage_v3, uni_ps, snapshot, read_limiter); } } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.h b/dbms/src/Storages/DeltaMerge/StoragePool.h index c6cf7f537a2..c6e68695664 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.h +++ b/dbms/src/Storages/DeltaMerge/StoragePool.h @@ -194,6 +194,8 @@ class StoragePool : private boost::noncopyable PageStoragePtr data_storage_v3; PageStoragePtr meta_storage_v3; + UniversalPageStoragePtr uni_ps; + PageReaderPtr log_storage_reader; PageReaderPtr data_storage_reader; PageReaderPtr meta_storage_reader; diff --git a/dbms/src/Storages/DeltaMerge/WriteBatches.h b/dbms/src/Storages/DeltaMerge/WriteBatches.h index bfdc4b1b08d..f7907b543bf 100644 --- a/dbms/src/Storages/DeltaMerge/WriteBatches.h +++ b/dbms/src/Storages/DeltaMerge/WriteBatches.h @@ -16,7 +16,7 @@ #include #include -#include +#include namespace DB { @@ -25,16 +25,17 @@ namespace DM struct WriteBatches : private boost::noncopyable { NamespaceId ns_id; - WriteBatch log; - WriteBatch data; - WriteBatch meta; + PageStorageRunMode run_mode; + WriteBatchWrapper log; + WriteBatchWrapper data; + WriteBatchWrapper meta; PageIdU64s written_log; PageIdU64s written_data; - WriteBatch removed_log; - WriteBatch removed_data; - WriteBatch removed_meta; + WriteBatchWrapper removed_log; + WriteBatchWrapper removed_data; + WriteBatchWrapper removed_meta; StoragePool & storage_pool; bool should_roll_back = false; @@ -43,22 +44,22 @@ struct WriteBatches : private boost::noncopyable explicit WriteBatches(StoragePool & storage_pool_, const WriteLimiterPtr & write_limiter_ = nullptr) : ns_id(storage_pool_.getNamespaceId()) - , log(ns_id) - , data(ns_id) - , meta(ns_id) - , removed_log(ns_id) - , removed_data(ns_id) - , removed_meta(ns_id) + , run_mode(storage_pool_.getPageStorageRunMode()) + , log(run_mode, StorageType::Log, ns_id) + , data(run_mode, StorageType::Data, ns_id) + , meta(run_mode, StorageType::Meta, ns_id) + , removed_log(run_mode, StorageType::Log, ns_id) + , removed_data(run_mode, StorageType::Data, ns_id) + , removed_meta(run_mode, StorageType::Meta, ns_id) , storage_pool(storage_pool_) , write_limiter(write_limiter_) - { - } + {} ~WriteBatches() { if constexpr (DM_RUN_CHECK) { - auto check_empty = [&](const WriteBatch & wb, const String & name) { + auto check_empty = [&](const WriteBatchWrapper & wb, const String & name) { if (!wb.empty()) { StackTrace trace; @@ -86,11 +87,9 @@ struct WriteBatches : private boost::noncopyable void writeLogAndData() { - PageIdU64s log_write_pages, data_write_pages; - if constexpr (DM_RUN_CHECK) { - auto check = [](const WriteBatch & wb, const String & what) { + auto check = [](const auto & wb, const String & what) { if (wb.empty()) return; for (const auto & w : wb.getWrites()) @@ -100,15 +99,43 @@ struct WriteBatches : private boost::noncopyable } LOG_TRACE(Logger::get(), "Write into {} : {}", what, wb.toString()); }; - - check(log, "log"); - check(data, "data"); + switch (run_mode) + { + case PageStorageRunMode::UNI_PS: + { + check(log.getUniversalWriteBatch(), "log"); + check(data.getUniversalWriteBatch(), "data"); + break; + } + default: + { + check(log.getWriteBatch(), "log"); + check(data.getWriteBatch(), "data"); + break; + } + } } - for (auto & w : log.getWrites()) - log_write_pages.push_back(w.page_id); - for (auto & w : data.getWrites()) - data_write_pages.push_back(w.page_id); + PageIdU64s log_write_pages, data_write_pages; + switch (run_mode) + { + case PageStorageRunMode::UNI_PS: + { + for (const auto & w : log.getUniversalWriteBatch().getWrites()) + log_write_pages.push_back(UniversalPageIdFormat::getU64ID(w.page_id)); + for (const auto & w : data.getUniversalWriteBatch().getWrites()) + data_write_pages.push_back(UniversalPageIdFormat::getU64ID(w.page_id)); + break; + } + default: + { + for (const auto & w : log.getWriteBatch().getWrites()) + log_write_pages.push_back(w.page_id); + for (const auto & w : data.getWriteBatch().getWrites()) + data_write_pages.push_back(w.page_id); + break; + } + } storage_pool.logWriter()->write(std::move(log), write_limiter); storage_pool.dataWriter()->write(std::move(data), write_limiter); @@ -124,16 +151,16 @@ struct WriteBatches : private boost::noncopyable void rollbackWrittenLogAndData() { - WriteBatch log_wb(ns_id); + WriteBatchWrapper log_wb(run_mode, StorageType::Log, ns_id); for (auto p : written_log) log_wb.delPage(p); - WriteBatch data_wb(ns_id); + WriteBatchWrapper data_wb(run_mode, StorageType::Data, ns_id); for (auto p : written_data) data_wb.delPage(p); if constexpr (DM_RUN_CHECK) { - auto check = [](const WriteBatch & wb, const String & what) { + auto check = [](const auto & wb, const String & what) { if (wb.empty()) return; for (const auto & w : wb.getWrites()) @@ -144,8 +171,21 @@ struct WriteBatches : private boost::noncopyable LOG_TRACE(Logger::get(), "Rollback remove from {} : {}", what, wb.toString()); }; - check(log_wb, "log_wb"); - check(data_wb, "data_wb"); + switch (run_mode) + { + case PageStorageRunMode::UNI_PS: + { + check(log_wb.getUniversalWriteBatch(), "log_wb"); + check(data_wb.getUniversalWriteBatch(), "data_wb"); + break; + } + default: + { + check(log_wb.getWriteBatch(), "log_wb"); + check(data_wb.getWriteBatch(), "data_wb"); + break; + } + } } storage_pool.logWriter()->write(std::move(log_wb), write_limiter); @@ -159,7 +199,7 @@ struct WriteBatches : private boost::noncopyable { if constexpr (DM_RUN_CHECK) { - auto check = [](const WriteBatch & wb, const String & what) { + auto check = [](const auto & wb, const String & what) { if (wb.empty()) return; for (const auto & w : wb.getWrites()) @@ -169,8 +209,19 @@ struct WriteBatches : private boost::noncopyable } LOG_TRACE(Logger::get(), "Write into {} : {}", what, wb.toString()); }; - - check(meta, "meta"); + switch (run_mode) + { + case PageStorageRunMode::UNI_PS: + { + check(meta.getUniversalWriteBatch(), "meta"); + break; + } + default: + { + check(meta.getWriteBatch(), "meta"); + break; + } + } } storage_pool.metaWriter()->write(std::move(meta), write_limiter); @@ -181,7 +232,7 @@ struct WriteBatches : private boost::noncopyable { if constexpr (DM_RUN_CHECK) { - auto check = [](const WriteBatch & wb, const String & what) { + auto check = [](const auto & wb, const String & what) { if (wb.empty()) return; for (const auto & w : wb.getWrites()) @@ -192,9 +243,23 @@ struct WriteBatches : private boost::noncopyable LOG_TRACE(Logger::get(), "Write into {} : {}", what, wb.toString()); }; - check(removed_log, "removed_log"); - check(removed_data, "removed_data"); - check(removed_meta, "removed_meta"); + switch (run_mode) + { + case PageStorageRunMode::UNI_PS: + { + check(removed_log.getUniversalWriteBatch(), "removed_log"); + check(removed_data.getUniversalWriteBatch(), "removed_data"); + check(removed_meta.getUniversalWriteBatch(), "removed_meta"); + break; + } + default: + { + check(removed_log.getWriteBatch(), "removed_log"); + check(removed_data.getWriteBatch(), "removed_data"); + check(removed_meta.getWriteBatch(), "removed_meta"); + break; + } + } } storage_pool.logWriter()->write(std::move(removed_log), write_limiter); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 5d2a3359544..ee8ea8455f6 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -77,6 +77,7 @@ Attr pkAttr() return Attr{col.name, col.id, col.type}; } + bool checkMatch( const String & test_case, Context & context, @@ -87,6 +88,11 @@ bool checkMatch( bool check_pk = false) { String name = "DMMinMaxIndexTest_" + test_case; + // We cannot restore tables with the same table id multiple times in a single run. + // Because we don't update max_page_id for PS instance at run time. + // And when restoring table, it will use the max_page_id from PS as the start point for allocating page id. + // So if we restore the same table multiple times in a single run, it may write different data using the same page id. + static int next_table_id = 100; auto clean_up = [&]() { context.dropMinMaxIndexCache(); @@ -108,12 +114,13 @@ bool checkMatch( Block header = toEmptyBlock(table_columns); Block block = genBlock(header, block_tuples); + // max page id is only updated at restart, so we need recreate page v3 before recreate table DeltaMergeStorePtr store = std::make_shared( context, false, "test_database", name, - /*table_id*/ 100, + /*table_id*/ next_table_id++, true, table_columns, getExtraHandleColumnDefine(is_common_handle), diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp index 1d55de2e319..e9472512f99 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp @@ -731,6 +731,8 @@ try table_info.is_common_handle = false; table_info.pk_is_handle = false; + // max page id is only updated at restart, so we need recreate page v3 before recreate table + ctx.initializeGlobalStoragePoolIfNeed(ctx.getPathPool()); storage = StorageDeltaMerge::create("TiFlash", /* db_name= */ "default", table_name, diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp index 147fa71aaf5..b90ad6cafa1 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -213,21 +213,8 @@ try ASSERT_EQ(segments.size(), 1); /// make sure all column file in delta value space is deleted - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr || storage_pool->log_storage_v2 != nullptr); - if (storage_pool->log_storage_v3) - { - storage_pool->log_storage_v3->gc(/* not_skip */ true); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 1); - } - if (storage_pool->log_storage_v2) - { - storage_pool->log_storage_v2->gc(/* not_skip */ true); - storage_pool->data_storage_v2->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v2->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v2->getNumberOfPages(), 1); - } + ASSERT_EQ(getPageNumAfterGC(StorageType::Log, NAMESPACE_ID), 0); + ASSERT_EQ(getPageNumAfterGC(StorageType::Data, NAMESPACE_ID), 1); } CATCH @@ -273,21 +260,8 @@ try ASSERT_EQ(segments.size(), 1); /// make sure all column file in delta value space is deleted - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr || storage_pool->log_storage_v2 != nullptr); - if (storage_pool->log_storage_v3) - { - storage_pool->log_storage_v3->gc(/* not_skip */ true); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 1); - } - if (storage_pool->log_storage_v2) - { - storage_pool->log_storage_v2->gc(/* not_skip */ true); - storage_pool->data_storage_v2->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v2->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v2->getNumberOfPages(), 1); - } + ASSERT_EQ(getPageNumAfterGC(StorageType::Log, NAMESPACE_ID), 0); + ASSERT_EQ(getPageNumAfterGC(StorageType::Data, NAMESPACE_ID), 1); } CATCH @@ -333,21 +307,8 @@ try ASSERT_EQ(segments.size(), 1); /// make sure all column file in delta value space is deleted - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr || storage_pool->log_storage_v2 != nullptr); - if (storage_pool->log_storage_v3) - { - storage_pool->log_storage_v3->gc(/* not_skip */ true); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 1); - } - if (storage_pool->log_storage_v2) - { - storage_pool->log_storage_v2->gc(/* not_skip */ true); - storage_pool->data_storage_v2->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v2->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v2->getNumberOfPages(), 1); - } + ASSERT_EQ(getPageNumAfterGC(StorageType::Log, NAMESPACE_ID), 0); + ASSERT_EQ(getPageNumAfterGC(StorageType::Data, NAMESPACE_ID), 1); } CATCH @@ -570,23 +531,7 @@ try { /// make sure all column file in delta value space is deleted - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr || storage_pool->log_storage_v2 != nullptr); - if (storage_pool->log_storage_v3) - { - storage_pool->log_storage_v3->gc(/* not_skip */ true); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - EXPECT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); - } - if (storage_pool->log_storage_v2) - { - storage_pool->log_storage_v2->gc(/* not_skip */ true); - storage_pool->data_storage_v2->gc(/* not_skip */ true); - EXPECT_EQ(storage_pool->log_storage_v2->getNumberOfPages(), 0); - } - - const auto file_usage = storage_pool->log_storage_reader->getFileUsageStatistics(); - LOG_DEBUG(log, "All delta-merged, log valid size on disk: {}", file_usage.total_valid_size); - EXPECT_EQ(file_usage.total_valid_size, 0); + ASSERT_EQ(getPageNumAfterGC(StorageType::Log, NAMESPACE_ID), 0); } } CATCH diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_ingest.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_ingest.cpp index 0a621131485..fdcf9c5f3ed 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_ingest.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_ingest.cpp @@ -67,9 +67,7 @@ try ASSERT_EQ(20, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); ASSERT_EQ(22, getSegmentRowNum(*right_seg)); - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); // Current segments: [-∞, 30), [30, +∞) @@ -79,8 +77,7 @@ try ASSERT_EQ(15, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); ASSERT_EQ(22, getSegmentRowNum(*right_seg)); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(2, stable_page_ids.size()); } CATCH @@ -148,9 +145,7 @@ try auto right_seg = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 200, Segment::SplitMode::Logical); ASSERT_EQ(0, getSegmentRowNum(*right_seg)); - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); ASSERT_PROFILE_EVENT(ProfileEvents::DMSegmentIngestDataByReplace, +1, { @@ -160,8 +155,7 @@ try ASSERT_EQ(100, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); // After ingestion, we should have 2 stables. - storage_pool->data_storage_v3->gc(/* not_skip */ true); - stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(2, stable_page_ids.size()); } CATCH diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp index 332ae156372..49c750ecab3 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp @@ -79,14 +79,12 @@ try flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); ASSERT_EQ(replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); - storage_pool->log_storage_v3->gc(/* not_skip */ true); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 1); // 1 DMFile + ASSERT_EQ(getPageNumAfterGC(StorageType::Log, NAMESPACE_ID), 0); + ASSERT_EQ(getPageNumAfterGC(StorageType::Data, NAMESPACE_ID), 1); // 1 DMFile + PageIdU64 replaced_stable_id{}; { - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); replaced_stable_id = *stable_page_ids.begin(); } @@ -99,15 +97,13 @@ try mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); ASSERT_EQ(47 + replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); - storage_pool->log_storage_v3->gc(/* not_skip */ true); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 1); + ASSERT_EQ(getPageNumAfterGC(StorageType::Log, NAMESPACE_ID), 0); + ASSERT_EQ(getPageNumAfterGC(StorageType::Data, NAMESPACE_ID), 1); auto const stable_files = segments[DELTA_MERGE_FIRST_SEGMENT_ID]->getStable()->getDMFiles(); { // Only the new stable DMFile is alive (and we should have a different DMFile). - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); ASSERT_TRUE(stable_page_ids.count(stable_files[0]->fileId())); ASSERT_FALSE(stable_page_ids.count(replaced_stable_id)); @@ -120,8 +116,7 @@ try } ASSERT_EQ(replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); { - storage_pool->data_storage_v3->gc(/* not_skip */ true); - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); // The stable before replaceData should be not alive anymore. ASSERT_FALSE(stable_page_ids.count(stable_files[0]->fileId())); @@ -189,7 +184,7 @@ try // Note: we have not yet enabled GC for the dmfile here. ASSERT_FALSE(dm_file->canGC()); { - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsWithoutGC(NAMESPACE_ID); ASSERT_TRUE(stable_page_ids.count(dm_file->fileId())); } @@ -200,8 +195,7 @@ try // Even when the stable is replaced, the DMFile should not be marked as GCable. ASSERT_FALSE(dm_file->canGC()); { - storage_pool->data_storage_v3->gc(/* not_skip */ true); - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); ASSERT_FALSE(stable_page_ids.count(dm_file->fileId())); } @@ -227,9 +221,7 @@ try writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); } - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); } CATCH @@ -257,12 +249,9 @@ try mergeSegment({DELTA_MERGE_FIRST_SEGMENT_ID, *seg_right_id}); ASSERT_EQ(110, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); - storage_pool->log_storage_v3->gc(/* not_skip */ true); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); - ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 1); - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(getPageNumAfterGC(StorageType::Log, NAMESPACE_ID), 0); + ASSERT_EQ(getPageNumAfterGC(StorageType::Data, NAMESPACE_ID), 1); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); } CATCH @@ -286,9 +275,7 @@ try auto shared_dm_files = segments[*seg_right_id]->getStable()->getDMFiles(); // As stable is shared in logical split, we should only have 1 alive external file. - ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); - storage_pool->data_storage_v3->gc(/* not_skip */ true); - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); // Now let's replace one segment. auto block = prepareWriteBlock(0, 300); @@ -298,8 +285,7 @@ try ASSERT_EQ(400, getSegmentRowNumWithoutMVCC(*seg_right_id)); // The previously-shared stable should be still valid. - storage_pool->data_storage_v3->gc(/* not_skip */ true); - stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(2, stable_page_ids.size()); ASSERT_TRUE(stable_page_ids.count(shared_dm_files[0]->fileId())); } @@ -322,8 +308,7 @@ try replaceSegmentData(DELTA_MERGE_FIRST_SEGMENT_ID, block); // There is a snapshot alive, so we should have 2 stables. - storage_pool->data_storage_v3->gc(/* not_skip */ true); - auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + auto stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(2, stable_page_ids.size()); // Continue the read @@ -334,8 +319,7 @@ try // Snapshot is dropped. in_stream = {}; - storage_pool->data_storage_v3->gc(/* not_skip */ true); - stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + stable_page_ids = getAliveExternalPageIdsAfterGC(NAMESPACE_ID); ASSERT_EQ(1, stable_page_ids.size()); } CATCH diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp index 3d2b60e584c..4aefab3ca5d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -600,6 +600,91 @@ PageIdU64 SegmentTestBasic::getRandomSegmentId() // Complexity is O(n) return segment_id; } +size_t SegmentTestBasic::getPageNumAfterGC(StorageType type, NamespaceId ns_id) const +{ + if (storage_pool->uni_ps) + { + storage_pool->uni_ps->gc(/* not_skip */ true); + return storage_pool->uni_ps->getNumberOfPages(UniversalPageIdFormat::toFullPrefix(type, ns_id)); + } + else + { + assert(storage_pool->log_storage_v3 != nullptr || storage_pool->log_storage_v2 != nullptr); + switch (type) + { + case StorageType::Log: + if (storage_pool->log_storage_v3) + { + storage_pool->log_storage_v3->gc(/* not_skip */ true); + return storage_pool->log_storage_v3->getNumberOfPages(); + } + else + { + storage_pool->log_storage_v2->gc(/* not_skip */ true); + return storage_pool->log_storage_v2->getNumberOfPages(); + } + break; + case StorageType::Data: + if (storage_pool->data_storage_v3) + { + storage_pool->data_storage_v3->gc(/* not_skip */ true); + return storage_pool->data_storage_v3->getNumberOfPages(); + } + else + { + storage_pool->data_storage_v2->gc(/* not_skip */ true); + return storage_pool->data_storage_v2->getNumberOfPages(); + } + break; + default: + throw Exception("", ErrorCodes::NOT_IMPLEMENTED); + } + } +} + +std::set SegmentTestBasic::getAliveExternalPageIdsWithoutGC(NamespaceId ns_id) const +{ + if (storage_pool->uni_ps) + { + return *(storage_pool->uni_ps->page_directory->getAliveExternalIds(UniversalPageIdFormat::toFullPrefix(StorageType::Data, ns_id))); + } + else + { + assert(storage_pool->data_storage_v3 != nullptr || storage_pool->data_storage_v2 != nullptr); + if (storage_pool->data_storage_v3) + { + return storage_pool->data_storage_v3->getAliveExternalPageIds(ns_id); + } + else + { + return storage_pool->data_storage_v2->getAliveExternalPageIds(ns_id); + } + } +} + +std::set SegmentTestBasic::getAliveExternalPageIdsAfterGC(NamespaceId ns_id) const +{ + if (storage_pool->uni_ps) + { + storage_pool->uni_ps->gc(/* not_skip */ true); + return *(storage_pool->uni_ps->page_directory->getAliveExternalIds(UniversalPageIdFormat::toFullPrefix(StorageType::Data, ns_id))); + } + else + { + assert(storage_pool->data_storage_v3 != nullptr || storage_pool->data_storage_v2 != nullptr); + if (storage_pool->data_storage_v3) + { + storage_pool->data_storage_v3->gc(/* not_skip */ true); + return storage_pool->data_storage_v3->getAliveExternalPageIds(ns_id); + } + else + { + storage_pool->data_storage_v2->gc(/* not_skip */ true); + return storage_pool->data_storage_v2->getAliveExternalPageIds(ns_id); + } + } +} + SegmentPtr SegmentTestBasic::reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns, DB::Settings && db_settings) { TiFlashStorageTestBasic::reload(std::move(db_settings)); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h index 481f217eca9..cba5652fdb9 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -99,6 +100,11 @@ class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic RowKeyValue buildRowKeyValue(Int64 key); static RowKeyRange buildRowKeyRange(Int64 begin, Int64 end); + size_t getPageNumAfterGC(StorageType type, NamespaceId ns_id) const; + + std::set getAliveExternalPageIdsWithoutGC(NamespaceId ns_id) const; + std::set getAliveExternalPageIdsAfterGC(NamespaceId ns_id) const; + protected: std::mt19937 random; diff --git a/dbms/src/Storages/FormatVersion.h b/dbms/src/Storages/FormatVersion.h index dbe3d9ee9bc..79b68d16b49 100644 --- a/dbms/src/Storages/FormatVersion.h +++ b/dbms/src/Storages/FormatVersion.h @@ -69,6 +69,8 @@ inline static constexpr Version V2 = 2; // - If we already have V2 data in disk. It will turn PageStorage into MIX_MODE // - If we don't have any v2 data in disk. It will turn PageStorage into ONLY_V3 inline static constexpr Version V3 = 3; +// Store all data in one ps instance. +inline static constexpr Version V4 = 4; } // namespace PageFormat struct StorageFormatVersion @@ -118,6 +120,15 @@ inline static const StorageFormatVersion STORAGE_FORMAT_V4 = StorageFormatVersio .identifier = 4, }; +inline static const StorageFormatVersion STORAGE_FORMAT_V5 = StorageFormatVersion{ + .segment = SegmentFormat::V2, + .dm_file = DMFileFormat::V2, + .stable = StableFormat::V1, + .delta = DeltaFormat::V3, + .page = PageFormat::V4, // diff + .identifier = 5, +}; + inline StorageFormatVersion STORAGE_FORMAT_CURRENT = STORAGE_FORMAT_V4; inline const StorageFormatVersion & toStorageFormat(UInt64 setting) @@ -132,6 +143,8 @@ inline const StorageFormatVersion & toStorageFormat(UInt64 setting) return STORAGE_FORMAT_V3; case 4: return STORAGE_FORMAT_V4; + case 5: + return STORAGE_FORMAT_V5; default: throw Exception("Illegal setting value: " + DB::toString(setting)); } diff --git a/dbms/src/Storages/Page/PageStorage.cpp b/dbms/src/Storages/Page/PageStorage.cpp index 49c60b5862b..cf89b9909ac 100644 --- a/dbms/src/Storages/Page/PageStorage.cpp +++ b/dbms/src/Storages/Page/PageStorage.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB { @@ -43,9 +44,11 @@ class PageReaderImpl : private boost::noncopyable public: static std::unique_ptr create( PageStorageRunMode run_mode_, + StorageType tag_, NamespaceId ns_id_, PageStoragePtr storage_v2_, PageStoragePtr storage_v3_, + UniversalPageStoragePtr uni_ps_, const PageStorage::SnapshotPtr & snap_, ReadLimiterPtr read_limiter_); @@ -341,11 +344,114 @@ class PageReaderImplMixed : public PageReaderImpl ReadLimiterPtr read_limiter; }; +class PageReaderImplUniversal : public PageReaderImpl +{ +public: + /// Not snapshot read. + explicit PageReaderImplUniversal(StorageType tag_, NamespaceId ns_id_, UniversalPageStoragePtr storage_, ReadLimiterPtr read_limiter_) + : storage(storage_) + , prefix(UniversalPageIdFormat::toFullPrefix(tag_, ns_id_)) + , read_limiter(read_limiter_) + { + } + + /// Snapshot read. + PageReaderImplUniversal(StorageType tag_, NamespaceId ns_id_, UniversalPageStoragePtr storage_, const PageStorage::SnapshotPtr & snap_, ReadLimiterPtr read_limiter_) + : storage(storage_) + , prefix(UniversalPageIdFormat::toFullPrefix(tag_, ns_id_)) + , snap(snap_) + , read_limiter(read_limiter_) + { + } + + DB::Page read(PageIdU64 page_id) const override + { + return storage->read(UniversalPageIdFormat::toFullPageId(prefix, page_id), read_limiter, snap); + } + + static inline PageMapU64 toPageMap(UniversalPageMap && us_page_map) + { + PageMapU64 page_map; + for (auto & id_and_page : us_page_map) + { + page_map.emplace(UniversalPageIdFormat::getU64ID(id_and_page.first), std::move(id_and_page.second)); + } + return page_map; + } + + PageMapU64 read(const PageIdU64s & page_ids) const override + { + UniversalPageIds us_page_ids; + for (const auto & page_id : page_ids) + { + us_page_ids.emplace_back(UniversalPageIdFormat::toFullPageId(prefix, page_id)); + } + return PageReaderImplUniversal::toPageMap(storage->read(us_page_ids, read_limiter, snap)); + } + + using PageReadFields = PageStorage::PageReadFields; + PageMapU64 read(const std::vector & page_fields) const override + { + std::vector us_page_fields; + us_page_fields.reserve(page_fields.size()); + for (const auto & f : page_fields) + { + us_page_fields.emplace_back(UniversalPageIdFormat::toFullPageId(prefix, f.first), f.second); + } + return PageReaderImplUniversal::toPageMap(storage->read(us_page_fields, read_limiter, snap)); + } + + PageIdU64 getNormalPageId(PageIdU64 page_id) const override + { + return UniversalPageIdFormat::getU64ID(storage->getNormalPageId(UniversalPageIdFormat::toFullPageId(prefix, page_id), snap)); + } + + PageEntry getPageEntry(PageIdU64 page_id) const override + { + return storage->getEntry(UniversalPageIdFormat::toFullPageId(prefix, page_id), snap); + } + + PageStorageSnapshotPtr getSnapshot(const String & tracing_id) const override + { + return storage->getSnapshot(tracing_id); + } + + // Get some statistics of all living snapshots and the oldest living snapshot. + SnapshotsStatistics getSnapshotsStat() const override + { + return storage->getSnapshotsStat(); + } + + void traverse(const std::function & acceptor, bool /*only_v2*/, bool /*only_v3*/) const override + { + auto snapshot = storage->getSnapshot(fmt::format("scan_{}", prefix)); + const auto page_ids = storage->page_directory->getAllPageIdsWithPrefix(prefix, snapshot); + for (const auto & page_id : page_ids) + { + const auto page_id_and_entry = storage->page_directory->getByID(page_id, snapshot); + acceptor(storage->blob_store->read(page_id_and_entry)); + } + } + + FileUsageStatistics getFileUsageStatistics() const override + { + return storage->getFileUsageStatistics(); + } + +private: + UniversalPageStoragePtr storage; + String prefix; + PageStorageSnapshotPtr snap; + ReadLimiterPtr read_limiter; +}; + std::unique_ptr PageReaderImpl::create( PageStorageRunMode run_mode_, + StorageType tag_, NamespaceId ns_id_, PageStoragePtr storage_v2_, PageStoragePtr storage_v3_, + UniversalPageStoragePtr uni_ps_, const PageStorage::SnapshotPtr & snap_, ReadLimiterPtr read_limiter_) { @@ -363,6 +469,10 @@ std::unique_ptr PageReaderImpl::create( { return std::make_unique(ns_id_, storage_v2_, storage_v3_, snap_, read_limiter_); } + case PageStorageRunMode::UNI_PS: + { + return std::make_unique(tag_, ns_id_, uni_ps_, snap_, read_limiter_); + } default: throw Exception(fmt::format("Unknown PageStorageRunMode {}", static_cast(run_mode_)), ErrorCodes::LOGICAL_ERROR); } @@ -372,14 +482,29 @@ std::unique_ptr PageReaderImpl::create( * PageReader methods * **********************/ /// Not snapshot read. -PageReader::PageReader(const PageStorageRunMode & run_mode_, NamespaceId ns_id_, PageStoragePtr storage_v2_, PageStoragePtr storage_v3_, ReadLimiterPtr read_limiter_) - : impl(PageReaderImpl::create(run_mode_, ns_id_, storage_v2_, storage_v3_, /*snap_=*/nullptr, read_limiter_)) +PageReader::PageReader( + const PageStorageRunMode & run_mode_, + StorageType tag_, + NamespaceId ns_id_, + PageStoragePtr storage_v2_, + PageStoragePtr storage_v3_, + UniversalPageStoragePtr uni_ps_, + ReadLimiterPtr read_limiter_) + : impl(PageReaderImpl::create(run_mode_, tag_, ns_id_, storage_v2_, storage_v3_, uni_ps_, /*snap_=*/nullptr, read_limiter_)) { } /// Snapshot read. -PageReader::PageReader(const PageStorageRunMode & run_mode_, NamespaceId ns_id_, PageStoragePtr storage_v2_, PageStoragePtr storage_v3_, PageStorage::SnapshotPtr snap_, ReadLimiterPtr read_limiter_) - : impl(PageReaderImpl::create(run_mode_, ns_id_, storage_v2_, storage_v3_, std::move(snap_), read_limiter_)) +PageReader::PageReader( + const PageStorageRunMode & run_mode_, + StorageType tag_, + NamespaceId ns_id_, + PageStoragePtr storage_v2_, + PageStoragePtr storage_v3_, + UniversalPageStoragePtr uni_ps_, + PageStorage::SnapshotPtr snap_, + ReadLimiterPtr read_limiter_) + : impl(PageReaderImpl::create(run_mode_, tag_, ns_id_, storage_v2_, storage_v3_, uni_ps_, std::move(snap_), read_limiter_)) { } @@ -436,23 +561,28 @@ void PageReader::traverse(const std::function & acc * PageWriter methods * *********************/ -void PageWriter::write(WriteBatch && write_batch, WriteLimiterPtr write_limiter) const +void PageWriter::write(WriteBatchWrapper && write_batch, WriteLimiterPtr write_limiter) const { switch (run_mode) { case PageStorageRunMode::ONLY_V2: { - writeIntoV2(std::move(write_batch), write_limiter); + writeIntoV2(std::move(write_batch.releaseWriteBatch()), write_limiter); break; } case PageStorageRunMode::ONLY_V3: { - writeIntoV3(std::move(write_batch), write_limiter); + writeIntoV3(std::move(write_batch.releaseWriteBatch()), write_limiter); break; } case PageStorageRunMode::MIX_MODE: { - writeIntoMixMode(std::move(write_batch), write_limiter); + writeIntoMixMode(std::move(write_batch.releaseWriteBatch()), write_limiter); + break; + } + case PageStorageRunMode::UNI_PS: + { + writeIntoUni(std::move(write_batch.releaseUniversalWriteBatch()), write_limiter); break; } } @@ -603,6 +733,10 @@ void PageWriter::writeIntoMixMode(WriteBatch && write_batch, WriteLimiterPtr wri } } +void PageWriter::writeIntoUni(UniversalWriteBatch && write_batch, WriteLimiterPtr write_limiter) const +{ + uni_ps->write(std::move(write_batch), write_limiter); +} PageStorageConfig PageWriter::getSettings() const { @@ -617,6 +751,7 @@ PageStorageConfig PageWriter::getSettings() const return storage_v3->getSettings(); } case PageStorageRunMode::MIX_MODE: + case PageStorageRunMode::UNI_PS: { throw Exception("Not support.", ErrorCodes::NOT_IMPLEMENTED); } @@ -645,6 +780,11 @@ void PageWriter::reloadSettings(const PageStorageConfig & new_config) const storage_v3->reloadSettings(new_config); break; } + case PageStorageRunMode::UNI_PS: + { + // Uni PS will reload config in its gc thread + break; + } default: throw Exception(fmt::format("Unknown PageStorageRunMode {}", static_cast(run_mode)), ErrorCodes::LOGICAL_ERROR); } @@ -668,6 +808,10 @@ bool PageWriter::gc(bool not_skip, const WriteLimiterPtr & write_limiter, const ok |= storage_v3->gc(not_skip, write_limiter, read_limiter); return ok; } + case PageStorageRunMode::UNI_PS: + { + return false; + } default: throw Exception(fmt::format("Unknown PageStorageRunMode {}", static_cast(run_mode)), ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 856397dca5a..30febef3ca1 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -51,20 +51,14 @@ class Context; class PageStorage; using PageStoragePtr = std::shared_ptr; class RegionPersister; +class UniversalPageStorage; +using UniversalPageStoragePtr = std::shared_ptr; namespace ErrorCodes { extern const int LOGICAL_ERROR; } // namespace ErrorCodes - -enum class PageStorageRunMode : UInt8 -{ - ONLY_V2 = 1, - ONLY_V3 = 2, - MIX_MODE = 3, -}; - /** * A storage system stored pages. Pages are serialized objects referenced by PageID. Store Page with the same PageID * will cover the old ones. @@ -234,10 +228,25 @@ class PageReader : private boost::noncopyable { public: /// Not snapshot read. - explicit PageReader(const PageStorageRunMode & run_mode_, NamespaceId ns_id_, PageStoragePtr storage_v2_, PageStoragePtr storage_v3_, ReadLimiterPtr read_limiter_); + explicit PageReader( + const PageStorageRunMode & run_mode_, + StorageType tag_, + NamespaceId ns_id_, + PageStoragePtr storage_v2_, + PageStoragePtr storage_v3_, + UniversalPageStoragePtr uni_ps_, + ReadLimiterPtr read_limiter_); /// Snapshot read. - PageReader(const PageStorageRunMode & run_mode_, NamespaceId ns_id_, PageStoragePtr storage_v2_, PageStoragePtr storage_v3_, PageStorage::SnapshotPtr snap_, ReadLimiterPtr read_limiter_); + PageReader( + const PageStorageRunMode & run_mode_, + StorageType tag_, + NamespaceId ns_id_, + PageStoragePtr storage_v2_, + PageStoragePtr storage_v3_, + UniversalPageStoragePtr uni_ps_, + PageStorage::SnapshotPtr snap_, + ReadLimiterPtr read_limiter_); ~PageReader(); @@ -269,14 +278,16 @@ using PageReaderPtr = std::shared_ptr; class PageWriter : private boost::noncopyable { public: - PageWriter(PageStorageRunMode run_mode_, PageStoragePtr storage_v2_, PageStoragePtr storage_v3_) + PageWriter(PageStorageRunMode run_mode_, StorageType tag_, PageStoragePtr storage_v2_, PageStoragePtr storage_v3_, UniversalPageStoragePtr uni_ps_) : run_mode(run_mode_) + , tag(tag_) , storage_v2(storage_v2_) , storage_v3(storage_v3_) + , uni_ps(uni_ps_) { } - void write(WriteBatch && write_batch, WriteLimiterPtr write_limiter) const; + void write(WriteBatchWrapper && write_batch, WriteLimiterPtr write_limiter) const; friend class RegionPersister; @@ -291,6 +302,8 @@ class PageWriter : private boost::noncopyable #endif void writeIntoMixMode(WriteBatch && write_batch, WriteLimiterPtr write_limiter) const; + void writeIntoUni(UniversalWriteBatch && write_batch, WriteLimiterPtr write_limiter) const; + // A wrap of getSettings only used for `RegionPersister::gc` PageStorageConfig getSettings() const; @@ -302,8 +315,10 @@ class PageWriter : private boost::noncopyable private: PageStorageRunMode run_mode; + StorageType tag; PageStoragePtr storage_v2; PageStoragePtr storage_v3; + UniversalPageStoragePtr uni_ps; }; using PageWriterPtr = std::shared_ptr; diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 1bb6d5e0dce..b9afebfc793 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -859,8 +859,7 @@ PageDirectory::PageDirectory(String storage_name, WALStorePtr && wal_, UI , wal(std::move(wal_)) , max_persisted_log_files(max_persisted_log_files_) , log(Logger::get(storage_name)) -{ -} +{} template PageDirectorySnapshotPtr PageDirectory::createSnapshot(const String & tracing_id) const @@ -1146,7 +1145,7 @@ typename PageDirectory::PageId PageDirectory::getNormalPageId(cons } template -UInt64 PageDirectory::getMaxId() const +UInt64 PageDirectory::getMaxIdAfterRestart() const { std::shared_lock read_lock(table_rw_mutex); return max_page_id; @@ -1171,7 +1170,6 @@ typename PageDirectory::PageIdSet PageDirectory::getAllPageIds() template typename PageDirectory::PageIdSet PageDirectory::getAllPageIdsWithPrefix(const String & prefix, const DB::PageStorageSnapshotPtr & snap_) { - UNUSED(snap_); if constexpr (std::is_same_v) { PageIdSet page_ids; @@ -1181,7 +1179,7 @@ typename PageDirectory::PageIdSet PageDirectory::getAllPageIdsWith iter != mvcc_table_directory.end(); ++iter) { - if (!iter->first.isPrefix(prefix)) + if (!iter->first.hasPrefix(prefix)) break; // Only return the page_id that is visible if (iter->second->isVisible(seq)) @@ -1341,8 +1339,6 @@ void PageDirectory::apply(PageEntriesEdit && edit, const WriteLimiterPtr for (const auto & r : edit.getRecords()) { // Protected in write_lock - max_page_id = std::max(max_page_id, Trait::PageIdTrait::getU64ID(r.page_id)); - auto [iter, created] = mvcc_table_directory.insert(std::make_pair(r.page_id, nullptr)); if (created) { @@ -1503,9 +1499,13 @@ PageDirectory::getEntriesByBlobIds(const std::vector & blob_i RUNTIME_CHECK(page_iter != mvcc_table_directory.end(), ref_id, ori_id, ver); } const auto & version_entries = page_iter->second; + // After storing all data in one PageStorage instance, we will run full gc + // with external pages. Skip rewriting if it is an external pages. + if (version_entries->isExternalPage()) + continue; // the latest entry with version.seq <= ref_id.create_ver.seq auto entry = version_entries->getLastEntry(ver.sequence); - RUNTIME_CHECK(entry.has_value(), ref_id, ori_id, ver); + RUNTIME_CHECK_MSG(entry.has_value(), "ref_id={} ori_id={} ver={} entries={}", ref_id, ori_id, ver, version_entries->toDebugString()); // If the being-ref entry lays on the full gc candidate blobfiles, then we // need to rewrite the ref-id to a normal page. if (blob_id_set.count(entry->file_id) > 0) @@ -1737,6 +1737,29 @@ typename PageDirectory::PageEntriesEdit PageDirectory::dumpSnapsho return edit; } +template +size_t PageDirectory::numPagesWithPrefix(const String & prefix) const +{ + if constexpr (std::is_same_v) + { + std::shared_lock read_lock(table_rw_mutex); + size_t num = 0; + for (auto iter = mvcc_table_directory.lower_bound(prefix); + iter != mvcc_table_directory.end(); + ++iter) + { + if (!iter->first.hasPrefix(prefix)) + break; + num++; + } + return num; + } + else + { + throw Exception("", ErrorCodes::NOT_IMPLEMENTED); + } +} + template class VersionedPageEntries; template class VersionedPageEntries; diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index 4543ecdc358..2bb70b0a5e3 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -163,6 +163,8 @@ class VersionedPageEntries , being_ref_count(1) {} + bool isExternalPage() const { return type == EditRecordType::VAR_EXTERNAL; } + [[nodiscard]] PageLock acquireLock() const { return std::lock_guard(m); @@ -337,7 +339,7 @@ class PageDirectory PageId getNormalPageId(const PageId & page_id, const DB::PageStorageSnapshotPtr & snap_, bool throw_on_not_exist) const; - UInt64 getMaxId() const; + UInt64 getMaxIdAfterRestart() const; PageIdSet getAllPageIds(); @@ -382,6 +384,8 @@ class PageDirectory std::shared_lock read_lock(table_rw_mutex); return mvcc_table_directory.size(); } + // Only used in test + size_t numPagesWithPrefix(const String & prefix) const; FileUsageStatistics getFileUsageStatistics() const { @@ -422,6 +426,10 @@ class PageDirectory } private: + // max page id after restart(just used for table storage). + // it may be for the whole instance or just for some specific prefix which is depending on the Trait passed. + // Keeping it up to date is costly but useless, so it is not updated after restarting. Do NOT rely on it + // except for specific situations UInt64 max_page_id; std::atomic sequence; diff --git a/dbms/src/Storages/Page/V3/PageDirectory/PageIdTrait.h b/dbms/src/Storages/Page/V3/PageDirectory/PageIdTrait.h index 9ee7405eec7..65ffa23f91f 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory/PageIdTrait.h +++ b/dbms/src/Storages/Page/V3/PageDirectory/PageIdTrait.h @@ -57,17 +57,11 @@ struct PageIdTrait } static inline PageIdU64 getU64ID(const PageId & page_id) { - if (page_id.size() >= sizeof(UInt64)) - return UniversalPageIdFormat::decodeUInt64(page_id.data() + page_id.size() - sizeof(UInt64)); - else - return INVALID_PAGE_U64_ID; + return UniversalPageIdFormat::getU64ID(page_id); } static inline Prefix getPrefix(const PageId & page_id) { - if (page_id.size() >= sizeof(UInt64)) - return page_id.substr(0, page_id.size() - sizeof(UInt64)).toStr(); - else - return ""; + return UniversalPageIdFormat::getFullPrefix(page_id); } static inline PageId getPageMapKey(const PageId & page_id) { diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 7ddd132ed8f..14eeba1eee4 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -53,7 +53,7 @@ PageDirectoryFactory::createFromReader(const String & storage_name, WALSt // try to run GC again on some entries that are already marked as invalid in BlobStore. // It's no need to remove the expired entries in BlobStore, so skip filling removed_entries to improve performance. dir->gcInMemEntries(/*return_removed_entries=*/false); - LOG_INFO(DB::Logger::get(storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); + LOG_INFO(DB::Logger::get(storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxIdAfterRestart(), dir->sequence); if (blob_stats) { @@ -164,7 +164,21 @@ void PageDirectoryFactory::applyRecord( } } - dir->max_page_id = std::max(dir->max_page_id, Trait::PageIdTrait::getU64ID(r.page_id)); + if constexpr (std::is_same_v) + { + // We only need page id under specific prefix after restart. + // If you want to add other prefix here, make sure the page id allocation space is still enough after adding it. + if (r.page_id.hasPrefix(UniversalPageIdFormat::toSubPrefix(StorageType::Data)) + || r.page_id.hasPrefix(UniversalPageIdFormat::toSubPrefix(StorageType::Log)) + || r.page_id.hasPrefix(UniversalPageIdFormat::toSubPrefix(StorageType::Meta))) + { + dir->max_page_id = std::max(dir->max_page_id, Trait::PageIdTrait::getU64ID(r.page_id)); + } + } + else + { + dir->max_page_id = std::max(dir->max_page_id, Trait::PageIdTrait::getU64ID(r.page_id)); + } const auto & version_list = iter->second; const auto & restored_version = r.version; diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index c671a30d145..0f7165c9072 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -76,7 +76,7 @@ void PageStorageImpl::restore() PageIdU64 PageStorageImpl::getMaxId() { - return page_directory->getMaxId(); + return page_directory->getMaxIdAfterRestart(); } void PageStorageImpl::drop() diff --git a/dbms/src/Storages/Page/V3/Universal/UniversalPageId.h b/dbms/src/Storages/Page/V3/Universal/UniversalPageId.h index 40e7200f56c..28b23cc1611 100644 --- a/dbms/src/Storages/Page/V3/Universal/UniversalPageId.h +++ b/dbms/src/Storages/Page/V3/Universal/UniversalPageId.h @@ -18,21 +18,11 @@ #include #include #include -#include namespace DB { class UniversalPageId final { -public: - static inline UniversalPageId toFullPageId(const String & prefix, PageIdU64 page_id) - { - WriteBufferFromOwnString buff; - writeString(prefix, buff); - UniversalPageIdFormat::encodeUInt64(page_id, buff); - return buff.releaseStr(); - } - public: UniversalPageId() = default; @@ -73,7 +63,7 @@ class UniversalPageId final bool empty() const { return id.empty(); } UniversalPageId substr(size_t pos, size_t npos) const { return id.substr(pos, npos); } bool operator<(const UniversalPageId & rhs) const { return id < rhs.id; } - bool isPrefix(const String & str) const { return startsWith(id, str); } + bool hasPrefix(const String & str) const { return startsWith(id, str); } String toStr() const { return id; } const String & asStr() const { return id; } @@ -90,23 +80,3 @@ inline bool operator==(const String & lhs, const UniversalPageId & rhs) return lhs == rhs.id; } } // namespace DB - -template <> -struct fmt::formatter -{ - static constexpr auto parse(format_parse_context & ctx) -> decltype(ctx.begin()) - { - const auto * it = ctx.begin(); - const auto * end = ctx.end(); - /// Only support {}. - if (it != end && *it != '}') - throw format_error("invalid format"); - return it; - } - - template - auto format(const DB::UniversalPageId & value, FormatContext & ctx) const -> decltype(ctx.out()) - { - return format_to(ctx.out(), "{}", Redact::keyToHexString(value.data(), value.size())); - } -}; \ No newline at end of file diff --git a/dbms/src/Storages/Page/V3/Universal/UniversalPageIdFormat.h b/dbms/src/Storages/Page/V3/Universal/UniversalPageIdFormat.h index 87911593b71..7be4b2c8c1d 100644 --- a/dbms/src/Storages/Page/V3/Universal/UniversalPageIdFormat.h +++ b/dbms/src/Storages/Page/V3/Universal/UniversalPageIdFormat.h @@ -16,12 +16,14 @@ #include #include +#include +#include namespace DB { // General UniversalPageId Format: Prefix + PageIdU64. // So normally the size of page id should be larger than 8 bytes(size of PageIdU64). -// If the size of page id is smaller than 8 bytes, it will be regraded as a whole.(Its Prefix is empty, while PageIdU64 is INVALID_PAGE_U64_ID) +// If the size of page id is smaller than 8 bytes, it will be regraded as a whole.(Its Prefix is itself, while PageIdU64 is INVALID_PAGE_U64_ID) // // Currently, if the PageIdU64 is 0(which is INVALID_PAGE_U64_ID), it may have some special meaning in some cases, // so please avoid it in the following case: @@ -43,8 +45,68 @@ namespace DB // Data // Prefix = [optional prefix] + "td" + NamespaceId +enum class StorageType +{ + Log = 1, + Data = 2, + Meta = 3, + KVStore = 4, +}; + struct UniversalPageIdFormat { +public: + static inline UniversalPageId toFullPageId(const String & prefix, PageIdU64 page_id) + { + WriteBufferFromOwnString buff; + writeString(prefix, buff); + UniversalPageIdFormat::encodeUInt64(page_id, buff); + return buff.releaseStr(); + } + + static inline String toSubPrefix(StorageType type) + { + switch (type) + { + case StorageType::Log: + return "tl"; + case StorageType::Data: + return "td"; + case StorageType::Meta: + return "tm"; + case StorageType::KVStore: + return "kvs"; + default: + throw Exception(fmt::format("Unknown storage type {}", static_cast(type)), ErrorCodes::LOGICAL_ERROR); + } + } + + static inline String toFullPrefix(StorageType type, NamespaceId ns_id) + { + WriteBufferFromOwnString buff; + writeString(toSubPrefix(type), buff); + if (type != StorageType::KVStore) + { + UniversalPageIdFormat::encodeUInt64(ns_id, buff); + } + return buff.releaseStr(); + } + + static inline PageIdU64 getU64ID(const UniversalPageId & page_id) + { + if (page_id.size() >= sizeof(UInt64)) + return decodeUInt64(page_id.data() + page_id.size() - sizeof(UInt64)); + else + return INVALID_PAGE_U64_ID; + } + + static inline String getFullPrefix(const UniversalPageId & page_id) + { + size_t prefix_length = (page_id.size() >= sizeof(UInt64)) ? (page_id.size() - sizeof(UInt64)) : page_id.size(); + return page_id.substr(0, prefix_length).toStr(); + } + +private: static inline void encodeUInt64(const UInt64 x, WriteBuffer & ss) { auto u = toBigEndian(x); @@ -58,3 +120,24 @@ struct UniversalPageIdFormat } }; } // namespace DB + +template <> +struct fmt::formatter +{ + static constexpr auto parse(format_parse_context & ctx) -> decltype(ctx.begin()) + { + const auto * it = ctx.begin(); + const auto * end = ctx.end(); + /// Only support {}. + if (it != end && *it != '}') + throw format_error("invalid format"); + return it; + } + + template + auto format(const DB::UniversalPageId & value, FormatContext & ctx) const -> decltype(ctx.out()) + { + auto prefix = DB::UniversalPageIdFormat::getFullPrefix(value); + return format_to(ctx.out(), "{}.{}", Redact::keyToHexString(prefix.data(), prefix.size()), DB::UniversalPageIdFormat::getU64ID(value)); + } +}; diff --git a/dbms/src/Storages/Page/V3/Universal/UniversalPageStorage.cpp b/dbms/src/Storages/Page/V3/Universal/UniversalPageStorage.cpp index 9df23eb58b0..ff83eb8e760 100644 --- a/dbms/src/Storages/Page/V3/Universal/UniversalPageStorage.cpp +++ b/dbms/src/Storages/Page/V3/Universal/UniversalPageStorage.cpp @@ -23,7 +23,7 @@ namespace DB { UniversalPageStoragePtr UniversalPageStorage::create( - String name, + const String & name, PSDiskDelegatorPtr delegator, const PageStorageConfig & config, const FileProviderPtr & file_provider) @@ -47,6 +47,11 @@ void UniversalPageStorage::restore() .create(storage_name, file_provider, delegator, PS::V3::WALConfig::from(config)); } +size_t UniversalPageStorage::getNumberOfPages(const String & prefix) const +{ + return page_directory->numPagesWithPrefix(prefix); +} + void UniversalPageStorage::write(UniversalWriteBatch && write_batch, const WriteLimiterPtr & write_limiter) const { if (unlikely(write_batch.empty())) @@ -182,9 +187,9 @@ DB::PageEntry UniversalPageStorage::getEntry(const UniversalPageId & page_id, Sn } } -PageIdU64 UniversalPageStorage::getMaxId() const +PageIdU64 UniversalPageStorage::getMaxIdAfterRestart() const { - return page_directory->getMaxId(); + return page_directory->getMaxIdAfterRestart(); } bool UniversalPageStorage::gc(bool /*not_skip*/, const WriteLimiterPtr & write_limiter, const ReadLimiterPtr & read_limiter) diff --git a/dbms/src/Storages/Page/V3/Universal/UniversalPageStorage.h b/dbms/src/Storages/Page/V3/Universal/UniversalPageStorage.h index 336d4747a74..1708eb5eaf3 100644 --- a/dbms/src/Storages/Page/V3/Universal/UniversalPageStorage.h +++ b/dbms/src/Storages/Page/V3/Universal/UniversalPageStorage.h @@ -24,8 +24,7 @@ #include #include #include -#include -#include +#include #include #include @@ -56,7 +55,7 @@ class UniversalPageStorage final public: static UniversalPageStoragePtr create( - String name, + const String & name, PSDiskDelegatorPtr delegator, const PageStorageConfig & config, const FileProviderPtr & file_provider); @@ -96,6 +95,8 @@ class UniversalPageStorage final return u; } + size_t getNumberOfPages(const String & prefix) const; + void write(UniversalWriteBatch && write_batch, const WriteLimiterPtr & write_limiter = nullptr) const; Page read(const UniversalPageId & page_id, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}, bool throw_on_not_exist = true) const; @@ -113,7 +114,7 @@ class UniversalPageStorage final DB::PageEntry getEntry(const UniversalPageId & page_id, SnapshotPtr snapshot); - PageIdU64 getMaxId() const; + PageIdU64 getMaxIdAfterRestart() const; // We may skip the GC to reduce useless reading by default. bool gc(bool not_skip = false, const WriteLimiterPtr & write_limiter = nullptr, const ReadLimiterPtr & read_limiter = nullptr); diff --git a/dbms/src/Storages/Page/V3/Universal/UniversalPageStorageService.cpp b/dbms/src/Storages/Page/V3/Universal/UniversalPageStorageService.cpp new file mode 100644 index 00000000000..2cc7a8af3d3 --- /dev/null +++ b/dbms/src/Storages/Page/V3/Universal/UniversalPageStorageService.cpp @@ -0,0 +1,58 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +UniversalPageStorageServicePtr UniversalPageStorageService::create( + Context & context, + const String & name, + PSDiskDelegatorPtr delegator, + const PageStorageConfig & config) +{ + auto service = UniversalPageStorageServicePtr(new UniversalPageStorageService(context)); + service->uni_page_storage = UniversalPageStorage::create(name, delegator, config, context.getFileProvider()); + service->uni_page_storage->restore(); + service->gc_handle = context.getBackgroundPool().addTask( + [service] { + return service->uni_page_storage->gc(); + }, + false, + /*interval_ms*/ 60 * 1000); + return service; +} + +bool UniversalPageStorageService::gc() +{ + Timepoint now = Clock::now(); + const std::chrono::seconds try_gc_period(30); + if (now < (last_try_gc_time.load() + try_gc_period)) + return false; + + last_try_gc_time = now; + // TODO: reload config + return this->uni_page_storage->gc(); +} + +UniversalPageStorageService::~UniversalPageStorageService() +{ + if (gc_handle) + { + global_context.getBackgroundPool().removeTask(gc_handle); + gc_handle = nullptr; + } +} +} // namespace DB diff --git a/dbms/src/Storages/Page/V3/Universal/UniversalPageStorageService.h b/dbms/src/Storages/Page/V3/Universal/UniversalPageStorageService.h new file mode 100644 index 00000000000..99e3d573365 --- /dev/null +++ b/dbms/src/Storages/Page/V3/Universal/UniversalPageStorageService.h @@ -0,0 +1,56 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB +{ +class UniversalPageStorageService; +using UniversalPageStorageServicePtr = std::shared_ptr; + +// This is wrapper class for UniversalPageStorage. +// It mainly manages background tasks like gc for UniversalPageStorage. +// It is like StoragePool for Page V2, and GlobalStoragePool for Page V3. +class UniversalPageStorageService final +{ +public: + static UniversalPageStorageServicePtr + create( + Context & context, + const String & name, + PSDiskDelegatorPtr delegator, + const PageStorageConfig & config); + + bool gc(); + UniversalPageStoragePtr getUniversalPageStorage() const { return uni_page_storage; } + ~UniversalPageStorageService(); + +private: + explicit UniversalPageStorageService(Context & global_context_) + : global_context(global_context_) + , uni_page_storage(nullptr) + { + } + +private: + Context & global_context; + UniversalPageStoragePtr uni_page_storage; + BackgroundProcessingPool::TaskHandle gc_handle; + + std::atomic last_try_gc_time = Clock::now(); +}; +} // namespace DB diff --git a/dbms/src/Storages/Page/V3/Universal/UniversalWriteBatch.h b/dbms/src/Storages/Page/V3/Universal/UniversalWriteBatch.h index 14fe6b891dc..c804a38c7a5 100644 --- a/dbms/src/Storages/Page/V3/Universal/UniversalWriteBatch.h +++ b/dbms/src/Storages/Page/V3/Universal/UniversalWriteBatch.h @@ -44,13 +44,31 @@ class UniversalWriteBatch : private boost::noncopyable using Writes = std::vector; public: - UniversalWriteBatch() = default; - - UniversalWriteBatch(UniversalWriteBatch && rhs) - : writes(std::move(rhs.writes)) - , total_data_size(rhs.total_data_size) + explicit UniversalWriteBatch(String prefix_ = "") + : prefix(std::move(prefix_)) {} + void putPage(PageIdU64 page_id, UInt64 tag, const ReadBufferPtr & read_buffer, PageSize size, const PageFieldSizes & data_sizes = {}) + { + putPage(UniversalPageIdFormat::toFullPageId(prefix, page_id), tag, read_buffer, size, data_sizes); + } + + void putExternal(PageIdU64 page_id, UInt64 tag) + { + putExternal(UniversalPageIdFormat::toFullPageId(prefix, page_id), tag); + } + + // Add RefPage{ref_id} -> Page{page_id} + void putRefPage(PageIdU64 ref_id, PageIdU64 page_id) + { + putRefPage(UniversalPageIdFormat::toFullPageId(prefix, ref_id), UniversalPageIdFormat::toFullPageId(prefix, page_id)); + } + + void delPage(PageIdU64 page_id) + { + delPage(UniversalPageIdFormat::toFullPageId(prefix, page_id)); + } + void putPage(const UniversalPageId & page_id, UInt64 tag, const ReadBufferPtr & read_buffer, PageSize size, const PageFieldSizes & data_sizes = {}) { // Convert from data_sizes to the offset of each field @@ -123,12 +141,6 @@ class UniversalWriteBatch : private boost::noncopyable return count; } - void swap(UniversalWriteBatch & o) - { - writes.swap(o.writes); - std::swap(o.total_data_size, total_data_size); - } - void merge(UniversalWriteBatch & rhs) { writes.reserve(writes.size() + rhs.writes.size()); @@ -183,7 +195,21 @@ class UniversalWriteBatch : private boost::noncopyable return fmt_buffer.toString(); } + UniversalWriteBatch(UniversalWriteBatch && rhs) + : prefix(std::move(rhs.prefix)) + , writes(std::move(rhs.writes)) + , total_data_size(rhs.total_data_size) + {} + + void swap(UniversalWriteBatch & o) + { + prefix.swap(o.prefix); + writes.swap(o.writes); + std::swap(o.total_data_size, total_data_size); + } + private: + String prefix; Writes writes; size_t total_data_size = 0; }; diff --git a/dbms/src/Storages/Page/V3/Universal/tests/gtest_universal_page_storage.cpp b/dbms/src/Storages/Page/V3/Universal/tests/gtest_universal_page_storage.cpp index b78591beab5..dfb69041ebb 100644 --- a/dbms/src/Storages/Page/V3/Universal/tests/gtest_universal_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/Universal/tests/gtest_universal_page_storage.cpp @@ -42,6 +42,11 @@ class UniPageStorageTest : public DB::base::TiFlashStorageTestBasic log = Logger::get("PageStorageTest"); } + void reload() + { + page_storage = reopenWithConfig(config); + } + std::shared_ptr reopenWithConfig(const PageStorageConfig & config_) { auto path = getTemporaryPath(); @@ -75,13 +80,13 @@ try { UniversalWriteBatch wb; - wb.putPage(UniversalPageId::toFullPageId(prefix, 0), tag, std::make_shared(c_buff, buf_sz), buf_sz); - wb.putPage(UniversalPageId::toFullPageId(prefix, 21), tag, std::make_shared(c_buff, buf_sz), buf_sz); - wb.putPage(UniversalPageId::toFullPageId(prefix, 200), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix, 0), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix, 21), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix, 200), tag, std::make_shared(c_buff, buf_sz), buf_sz); page_storage->write(std::move(wb)); } - DB::Page page0 = page_storage->read(UniversalPageId::toFullPageId(prefix, 0)); + DB::Page page0 = page_storage->read(UniversalPageIdFormat::toFullPageId(prefix, 0)); ASSERT_TRUE(page0.isValid()); ASSERT_EQ(page0.data.size(), buf_sz); ASSERT_EQ(page0.page_id, 0UL); @@ -89,7 +94,7 @@ try { EXPECT_EQ(*(page0.data.begin() + i), static_cast(i % 0xff)); } - DB::Page page1 = page_storage->read(UniversalPageId::toFullPageId(prefix, 21)); + DB::Page page1 = page_storage->read(UniversalPageIdFormat::toFullPageId(prefix, 21)); ASSERT_TRUE(page1.isValid()); ASSERT_EQ(page1.data.size(), buf_sz); ASSERT_EQ(page1.page_id, 21UL); @@ -97,7 +102,7 @@ try { EXPECT_EQ(*(page1.data.begin() + i), static_cast(i % 0xff)); } - DB::Page page2 = page_storage->read(UniversalPageId::toFullPageId(prefix, 500), nullptr, {}, false); + DB::Page page2 = page_storage->read(UniversalPageIdFormat::toFullPageId(prefix, 500), nullptr, {}, false); ASSERT_TRUE(!page2.isValid()); } CATCH @@ -115,7 +120,7 @@ TEST_F(UniPageStorageTest, Traverse) { c_buff[0] = 10; c_buff[1] = i; - wb.putPage(UniversalPageId::toFullPageId(prefix1, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix1, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); } page_storage->write(std::move(wb)); } @@ -126,7 +131,7 @@ TEST_F(UniPageStorageTest, Traverse) { c_buff[0] = 10; c_buff[1] = i; - wb.putPage(UniversalPageId::toFullPageId(prefix2, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix2, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); } page_storage->write(std::move(wb)); } @@ -134,7 +139,7 @@ TEST_F(UniPageStorageTest, Traverse) { size_t read_count = 0; auto checker = [&](const UniversalPageId & page_id, const DB::Page & page) { - ASSERT_TRUE(page_id.isPrefix(prefix1)); + ASSERT_TRUE(page_id.hasPrefix(prefix1)); ASSERT_TRUE(page.isValid()); read_count += 1; }; @@ -145,7 +150,7 @@ TEST_F(UniPageStorageTest, Traverse) { size_t read_count = 0; auto checker = [&](const UniversalPageId & page_id, const DB::Page & page) { - ASSERT_TRUE(page_id.isPrefix(prefix2)); + ASSERT_TRUE(page_id.hasPrefix(prefix2)); ASSERT_TRUE(page.isValid()); read_count += 1; }; @@ -156,7 +161,7 @@ TEST_F(UniPageStorageTest, Traverse) { size_t read_count = 0; auto checker = [&](const UniversalPageId & page_id, const DB::Page & page) { - ASSERT_TRUE(page_id.isPrefix(prefix3)); + ASSERT_TRUE(page_id.hasPrefix(prefix3)); ASSERT_TRUE(page.isValid()); read_count += 1; }; @@ -176,7 +181,7 @@ TEST_F(UniPageStorageTest, TraverseWithSnap) { c_buff[0] = 10; c_buff[1] = i; - wb.putPage(UniversalPageId::toFullPageId(prefix1, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix1, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); } page_storage->write(std::move(wb)); } @@ -189,7 +194,7 @@ TEST_F(UniPageStorageTest, TraverseWithSnap) { c_buff[0] = 10; c_buff[1] = i; - wb.putPage(UniversalPageId::toFullPageId(prefix1, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix1, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); } page_storage->write(std::move(wb)); } @@ -197,7 +202,7 @@ TEST_F(UniPageStorageTest, TraverseWithSnap) { size_t read_count = 0; auto checker = [&](const UniversalPageId & page_id, const DB::Page & page) { - ASSERT_TRUE(page_id.isPrefix(prefix1)); + ASSERT_TRUE(page_id.hasPrefix(prefix1)); ASSERT_TRUE(page.isValid()); read_count += 1; }; @@ -212,7 +217,7 @@ TEST_F(UniPageStorageTest, TraverseWithSnap) { c_buff[0] = 10; c_buff[1] = i; - wb.delPage(UniversalPageId::toFullPageId(prefix1, i)); + wb.delPage(UniversalPageIdFormat::toFullPageId(prefix1, i)); } page_storage->write(std::move(wb)); } @@ -220,7 +225,7 @@ TEST_F(UniPageStorageTest, TraverseWithSnap) { size_t read_count = 0; auto checker = [&](const UniversalPageId & page_id, const DB::Page & page) { - ASSERT_TRUE(page_id.isPrefix(prefix1)); + ASSERT_TRUE(page_id.hasPrefix(prefix1)); ASSERT_TRUE(page.isValid()); read_count += 1; }; @@ -229,18 +234,48 @@ TEST_F(UniPageStorageTest, TraverseWithSnap) } } +TEST_F(UniPageStorageTest, GetMaxIdWithPrefix) +{ + const String prefix1 = UniversalPageIdFormat::toSubPrefix(StorageType::Log); + const String prefix2 = UniversalPageIdFormat::toSubPrefix(StorageType::Data); + const String prefix3 = UniversalPageIdFormat::toSubPrefix(StorageType::Data); + const String prefix4 = "aaa"; + const String prefix5 = "bbb"; + const UInt64 tag = 0; + const size_t write_count = 100; + { + UniversalWriteBatch wb; + for (size_t i = 0; i < write_count; i++) + { + c_buff[0] = 10; + c_buff[1] = i; + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix1, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix2, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix3, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix4, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + wb.putPage(UniversalPageIdFormat::toFullPageId(prefix5, i), tag, std::make_shared(c_buff, buf_sz), buf_sz); + } + page_storage->write(std::move(wb)); + } + + ASSERT_EQ(page_storage->getMaxIdAfterRestart(), 0); + + reload(); + ASSERT_EQ(page_storage->getMaxIdAfterRestart(), write_count - 1); +} + TEST(UniPageStorageIdTest, UniversalPageId) { { - auto u_id = UniversalPageId::toFullPageId("aaa", 100); - ASSERT_EQ(DB::PS::V3::universal::PageIdTrait::getU64ID(u_id), 100); - ASSERT_EQ(DB::PS::V3::universal::PageIdTrait::getPrefix(u_id), "aaa"); + auto u_id = UniversalPageIdFormat::toFullPageId("aaa", 100); + ASSERT_EQ(UniversalPageIdFormat::getU64ID(u_id), 100); + ASSERT_EQ(UniversalPageIdFormat::getFullPrefix(u_id), "aaa"); } { auto u_id = "z"; - ASSERT_EQ(DB::PS::V3::universal::PageIdTrait::getU64ID(u_id), 0); - ASSERT_EQ(DB::PS::V3::universal::PageIdTrait::getPrefix(u_id), ""); + ASSERT_EQ(UniversalPageIdFormat::getU64ID(u_id), 0); + ASSERT_EQ(UniversalPageIdFormat::getFullPrefix(u_id), "z"); } } } // namespace PS::universal::tests diff --git a/dbms/src/Storages/Page/V3/Universal/tests/gtest_universal_page_storage_storage_pool.cpp b/dbms/src/Storages/Page/V3/Universal/tests/gtest_universal_page_storage_storage_pool.cpp new file mode 100644 index 00000000000..2d0ae04a6ea --- /dev/null +++ b/dbms/src/Storages/Page/V3/Universal/tests/gtest_universal_page_storage_storage_pool.cpp @@ -0,0 +1,367 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +using namespace tests; +namespace PS::V3::tests +{ +class UniPageStorageStoragePoolTest : public DB::base::TiFlashStorageTestBasic +{ +public: + void SetUp() override + { + auto & global_context = DB::tests::TiFlashTestEnv::getGlobalContext(); + old_run_mode = global_context.getPageStorageRunMode(); + global_context.setPageStorageRunMode(PageStorageRunMode::UNI_PS); + TiFlashStorageTestBasic::SetUp(); + + reload(); + } + + void reload() + { + auto & global_context = DB::tests::TiFlashTestEnv::getGlobalContext(); + auto path = TiFlashTestEnv::getTemporaryPath("UniPageStorageStoragePoolTest"); + std::vector caps = {}; + Strings paths = {path}; + PathCapacityMetricsPtr cap_metrics = std::make_shared(0, paths, caps, Strings{}, caps); + storage_path_pool_v2 = std::make_unique(Strings{path}, Strings{path}, "test", "t1", true, cap_metrics, global_context.getFileProvider()); + path_pool = std::make_unique( + Strings{path}, + Strings{path}, + Strings{}, + cap_metrics, + global_context.getFileProvider()); + storage_pool = std::make_unique( + global_context, + TEST_NAMESPACE_ID, + *storage_path_pool_v2, + "test.t1"); + } + + void TearDown() override + { + auto & global_context = DB::tests::TiFlashTestEnv::getGlobalContext(); + global_context.setPageStorageRunMode(old_run_mode); + } + +private: + PageStorageRunMode old_run_mode; + std::unique_ptr storage_path_pool_v2; + std::unique_ptr path_pool; + +protected: + std::unique_ptr storage_pool; +}; + + +inline ::testing::AssertionResult getPageCompare( + const char * /*buff_cmp_expr*/, + const char * buf_size_expr, + const char * /*page_cmp_expr*/, + const char * page_id_expr, + char * buff_cmp, + const size_t buf_size, + const Page & page_cmp, + const PageIdU64 & page_id) +{ + if (page_cmp.data.size() != buf_size) + { + return testing::internal::EqFailure( + DB::toString(buf_size).c_str(), + DB::toString(page_cmp.data.size()).c_str(), + buf_size_expr, + "page.data.size()", + false); + } + + if (page_cmp.page_id != page_id) + { + return testing::internal::EqFailure( + DB::toString(page_id).c_str(), + DB::toString(page_cmp.page_id).c_str(), + page_id_expr, + "page.page_id", + false); + } + + if (strncmp(page_cmp.data.begin(), buff_cmp, buf_size) != 0) + { + return ::testing::AssertionFailure( // + ::testing::Message( + "Page data not match the buffer")); + } + + return ::testing::AssertionSuccess(); +} + +#define ASSERT_PAGE_EQ(buff_cmp, buf_size, page_cmp, page_id) \ + ASSERT_PRED_FORMAT4(getPageCompare, buff_cmp, buf_size, page_cmp, page_id) +#define EXPECT_PAGE_EQ(buff_cmp, buf_size, page_cmp, page_id) \ + EXPECT_PRED_FORMAT4(getPageCompare, buff_cmp, buf_size, page_cmp, page_id) + +TEST_F(UniPageStorageStoragePoolTest, WriteRead) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(2, tag, buff, buf_sz); + storage_pool->logWriter()->write(std::move(batch), nullptr); + } + + { + const auto & page1 = storage_pool->logReader()->read(1); + const auto & page2 = storage_pool->logReader()->read(2); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page2, 2); + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + const size_t buf_sz2 = 2048; + char c_buff2[buf_sz2] = {0}; + + ReadBufferPtr buff2 = std::make_shared(c_buff2, sizeof(c_buff2)); + batch.putPage(3, tag, buff2, buf_sz2); + storage_pool->logWriter()->write(std::move(batch), nullptr); + + const auto & page3 = storage_pool->logReader()->read(3); + ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + batch.delPage(3); + storage_pool->logWriter()->write(std::move(batch), nullptr); + ASSERT_ANY_THROW(storage_pool->logReader()->read(3)); + } +} +CATCH + +TEST_F(UniPageStorageStoragePoolTest, ReadWithSnapshot) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(2, tag, buff, buf_sz, {20, 120, 400, 200, 15, 75, 170, 24}); + storage_pool->logWriter()->write(std::move(batch), nullptr); + } + const size_t buf_sz2 = 2048; + char c_buff2[buf_sz2] = {0}; + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + ReadBufferPtr buff2 = std::make_shared(c_buff2, sizeof(c_buff2)); + batch.putPage(3, tag, buff2, buf_sz2); + storage_pool->logWriter()->write(std::move(batch), nullptr); + } + + auto snapshot = storage_pool->logReader()->getSnapshot("ReadWithSnapshotTest"); + { + auto page_reader_with_snap = storage_pool->newLogReader(nullptr, snapshot); + + const auto & page1 = page_reader_with_snap.read(1); + const auto & page2 = page_reader_with_snap.read(2); + const auto & page3 = page_reader_with_snap.read(3); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page2, 2); + ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + batch.delPage(3); + ReadBufferPtr buff2 = std::make_shared(c_buff2, sizeof(c_buff2)); + batch.putPage(4, tag, buff2, buf_sz2); + storage_pool->logWriter()->write(std::move(batch), nullptr); + } + { + auto page_reader_with_snap = storage_pool->newLogReader(nullptr, snapshot); + const auto & page3 = page_reader_with_snap.read(3); + ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); + ASSERT_THROW(page_reader_with_snap.read(4), DB::Exception); + } +} +CATCH + + +TEST_F(UniPageStorageStoragePoolTest, PutExt) +try +{ + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + batch.putExternal(1, 0); + batch.putExternal(2, 0); + batch.putExternal(3, 0); + storage_pool->logWriter()->write(std::move(batch), nullptr); + } + + auto uni_ps = storage_pool->global_context.getWriteNodePageStorage(); + auto external_ids = uni_ps->page_directory->getAliveExternalIds(UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)); + ASSERT_EQ((*external_ids).size(), 3); + ASSERT_TRUE((*external_ids).find(1) != (*external_ids).end()); + ASSERT_TRUE((*external_ids).find(2) != (*external_ids).end()); + ASSERT_TRUE((*external_ids).find(3) != (*external_ids).end()); +} +CATCH + +TEST_F(UniPageStorageStoragePoolTest, Ref) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz] = {0}; + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(7, 0, buff, buf_sz); + buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(8, 0, buff, buf_sz, {20, 120, 400, 200, 15, 75, 170, 24}); + storage_pool->logWriter()->write(std::move(batch), nullptr); + } + + { + const auto & entry = storage_pool->logReader()->getPageEntry(8); + ASSERT_EQ(entry.field_offsets.size(), 8); + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + batch.putRefPage(9, 7); + storage_pool->logWriter()->write(std::move(batch), nullptr); + ASSERT_EQ(storage_pool->logReader()->getNormalPageId(9), 7); + const auto & page = storage_pool->logReader()->read(9); + ASSERT_PAGE_EQ(c_buff, buf_sz, page, 9); + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + batch.delPage(7); + storage_pool->logWriter()->write(std::move(batch), nullptr); + ASSERT_EQ(storage_pool->logReader()->getNormalPageId(9), 7); + const auto & page = storage_pool->logReader()->read(9); + ASSERT_PAGE_EQ(c_buff, buf_sz, page, 9); + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + batch.putRefPage(10, 8); + + ASSERT_NO_THROW(storage_pool->logWriter()->write(std::move(batch), nullptr)); + ASSERT_EQ(storage_pool->logReader()->getNormalPageId(10), 8); + + std::vector read_fields; + read_fields.emplace_back(std::pair(10, {0, 1, 2, 6})); + + PageMapU64 page_maps = storage_pool->logReader()->read(read_fields); + ASSERT_EQ(page_maps.size(), 1); + ASSERT_EQ(page_maps.at(10).page_id, 10); + ASSERT_EQ(page_maps.at(10).field_offsets.size(), 4); + ASSERT_EQ(page_maps.at(10).data.size(), 710); + + auto field_offset = page_maps.at(10).field_offsets; + auto it = field_offset.begin(); + ASSERT_EQ(it->offset, 0); + ++it; + ASSERT_EQ(it->offset, 20); + ++it; + ASSERT_EQ(it->offset, 140); + ++it; + ASSERT_EQ(it->offset, 540); + } +} +CATCH + +TEST_F(UniPageStorageStoragePoolTest, RefWithSnapshot) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz] = {0}; + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(7, 0, buff, buf_sz); + buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(8, 0, buff, buf_sz, {20, 120, 400, 200, 15, 75, 170, 24}); + storage_pool->logWriter()->write(std::move(batch), nullptr); + } + + { + const auto & entry = storage_pool->logReader()->getPageEntry(8); + ASSERT_EQ(entry.field_offsets.size(), 8); + } + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + batch.putRefPage(9, 7); + storage_pool->logWriter()->write(std::move(batch), nullptr); + ASSERT_EQ(storage_pool->logReader()->getNormalPageId(9), 7); + const auto & page = storage_pool->logReader()->read(9); + ASSERT_PAGE_EQ(c_buff, buf_sz, page, 9); + } + + auto snapshot = storage_pool->logReader()->getSnapshot("ReadWithSnapshotTest"); + + { + WriteBatchWrapper batch{PageStorageRunMode::UNI_PS, UniversalPageIdFormat::toFullPrefix(StorageType::Log, TEST_NAMESPACE_ID)}; + batch.delPage(7); + batch.delPage(9); + storage_pool->logWriter()->write(std::move(batch), nullptr); + } + + { + auto page_reader_with_snap = storage_pool->newLogReader(nullptr, snapshot); + ASSERT_EQ(page_reader_with_snap.getNormalPageId(9), 7); + const auto & page = page_reader_with_snap.read(9); + ASSERT_PAGE_EQ(c_buff, buf_sz, page, 9); + } +} +CATCH + +} // namespace PS::V3::tests +} // namespace DB diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 77e7cf00619..35b979dd0a6 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -43,7 +43,7 @@ std::pair WALStore::create( String storage_name, FileProviderPtr & provider, PSDiskDelegatorPtr & delegator, - WALConfig config) + const WALConfig & config) { auto reader = WALStoreReader::create(storage_name, provider, @@ -66,7 +66,7 @@ WALStore::WALStore( const PSDiskDelegatorPtr & delegator_, const FileProviderPtr & provider_, Format::LogNumberType last_log_num_, - WALConfig config_) + const WALConfig & config_) : storage_name(std::move(storage_name_)) , delegator(delegator_) , provider(provider_) diff --git a/dbms/src/Storages/Page/V3/WALStore.h b/dbms/src/Storages/Page/V3/WALStore.h index 7cd0a930d71..c11e868acd0 100644 --- a/dbms/src/Storages/Page/V3/WALStore.h +++ b/dbms/src/Storages/Page/V3/WALStore.h @@ -57,13 +57,12 @@ class WALStore String storage_name_, FileProviderPtr & provider, PSDiskDelegatorPtr & delegator, - WALConfig config); + const WALConfig & config); WALStoreReaderPtr createReaderForFiles(const String & identifier, const LogFilenameSet & log_filenames, const ReadLimiterPtr & read_limiter); void apply(String && serialized_edit, const WriteLimiterPtr & write_limiter = nullptr); - FileUsageStatistics getFileUsageStatistics() const { FileUsageStatistics usage; @@ -107,7 +106,7 @@ class WALStore const PSDiskDelegatorPtr & delegator_, const FileProviderPtr & provider_, Format::LogNumberType last_log_num_, - WALConfig config); + const WALConfig & config); std::tuple, LogFilename> createLogWriter( diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 94ca37a5cbf..d38a35f2a83 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -1600,8 +1600,7 @@ try ASSERT_EQ(std::get<0>(ids[0]), buildV3Id(TEST_NAMESPACE_ID, 11)); ASSERT_EQ(std::get<0>(ids[1]), buildV3Id(TEST_NAMESPACE_ID, 12)); - // upsert 11->entry2 - // upsert 12->entry3 + // Mock full gc happen in BlobStore. upsert 11->entry2, upsert 12->entry3 PageEntriesEdit edit; edit.upsertPage(std::get<0>(ids[0]), std::get<1>(ids[0]), entry2); edit.upsertPage(std::get<0>(ids[1]), std::get<1>(ids[1]), entry3); @@ -1646,6 +1645,104 @@ try } CATCH +TEST_F(PageDirectoryGCTest, RewriteRefedIdToExternalPage) +try +{ + // 10->entry1, 11->10, 12->10 + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + { + u128::PageEntriesEdit edit; + edit.put(buildV3Id(TEST_NAMESPACE_ID, 10), entry1); + dir->apply(std::move(edit)); + } + { + u128::PageEntriesEdit edit; + edit.ref(buildV3Id(TEST_NAMESPACE_ID, 11), buildV3Id(TEST_NAMESPACE_ID, 10)); + dir->apply(std::move(edit)); + } + { + u128::PageEntriesEdit edit; + edit.ref(buildV3Id(TEST_NAMESPACE_ID, 12), buildV3Id(TEST_NAMESPACE_ID, 10)); + edit.del(buildV3Id(TEST_NAMESPACE_ID, 10)); + dir->apply(std::move(edit)); + } + // 50->ext_id, 51->50 + { + u128::PageEntriesEdit edit; + edit.putExternal(buildV3Id(TEST_NAMESPACE_ID, 50)); + edit.ref(buildV3Id(TEST_NAMESPACE_ID, 51), buildV3Id(TEST_NAMESPACE_ID, 50)); + dir->apply(std::move(edit)); + } + // entry1 should not be removed + { + auto outdated_entries = dir->gcInMemEntries(); + EXPECT_TRUE(outdated_entries.empty()); + } + + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry3{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123 + 1024, .checksum = 0x4567}; + { + // this will return ref page 11 and 12 that need to be rewritten + // to new blob file. + auto full_gc_entries = dir->getEntriesByBlobIds({1}); + ASSERT_EQ(full_gc_entries.first.size(), 1); + auto ids = full_gc_entries.first.at(1); + ASSERT_EQ(ids.size(), 2); + ASSERT_EQ(std::get<0>(ids[0]), buildV3Id(TEST_NAMESPACE_ID, 11)); + ASSERT_EQ(std::get<0>(ids[1]), buildV3Id(TEST_NAMESPACE_ID, 12)); + + // Mock full gc happen in BlobStore. upsert 11->entry2, upsert 12->entry3 + u128::PageEntriesEdit edit; + edit.upsertPage(std::get<0>(ids[0]), std::get<1>(ids[0]), entry2); + edit.upsertPage(std::get<0>(ids[1]), std::get<1>(ids[1]), entry3); + // this will rewrite ref page 11, 12 to normal page + dir->gcApply(std::move(edit)); + } + + // page 10 get removed + auto removed_entries = dir->gcInMemEntries(); + ASSERT_EQ(removed_entries.size(), 1); + EXPECT_SAME_ENTRY(removed_entries[0], entry1); + + { + auto snap = dir->createSnapshot(); + EXPECT_ENTRY_EQ(entry2, dir, 11, snap); + EXPECT_ENTRY_EQ(entry3, dir, 12, snap); + EXPECT_ENTRY_NOT_EXIST(dir, 10, snap); + auto external_ids = dir->getAliveExternalIds(TEST_NAMESPACE_ID); + ASSERT_GT(external_ids->count(50), 0); + } + + // del 11->entry2 + { + u128::PageEntriesEdit edit; + edit.del(buildV3Id(TEST_NAMESPACE_ID, 11)); + dir->apply(std::move(edit)); + // entry2 get removed + auto outdated_entries = dir->gcInMemEntries(); + ASSERT_EQ(1, outdated_entries.size()); + EXPECT_SAME_ENTRY(entry2, outdated_entries[0]); + } + // del 12->entry3 + { + u128::PageEntriesEdit edit; + edit.del(buildV3Id(TEST_NAMESPACE_ID, 12)); + dir->apply(std::move(edit)); + // entry3 get removed + auto outdated_entries = dir->gcInMemEntries(); + ASSERT_EQ(1, outdated_entries.size()); + EXPECT_SAME_ENTRY(entry3, outdated_entries[0]); + } + + auto external_ids = dir->getAliveExternalIds(TEST_NAMESPACE_ID); + ASSERT_GT(external_ids->count(50), 0); + auto all_page_ids = dir->getAllPageIds(); + ASSERT_EQ(all_page_ids.size(), 2); + ASSERT_GT(all_page_ids.count(buildV3Id(TEST_NAMESPACE_ID, 50)), 0); + ASSERT_GT(all_page_ids.count(buildV3Id(TEST_NAMESPACE_ID, 51)), 0); +} +CATCH + TEST_F(PageDirectoryGCTest, RewriteRefedIdWithConcurrentDelete) try { diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index 6beff83a0c2..054e6cd783d 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -1495,7 +1495,7 @@ try batch.putExternal(1999, 0); batch.putExternal(2000, 0); page_storage->write(std::move(batch)); - ASSERT_EQ(page_storage->getMaxId(), 2000); + // ASSERT_EQ(page_storage->getMaxId(), 2000); // max id will not be updated, ignore this check } { @@ -1523,7 +1523,7 @@ try batch.putExternal(20000, 0); batch.putExternal(20001, 0); page_storage->write(std::move(batch)); - ASSERT_EQ(page_storage->getMaxId(), 20001); + // ASSERT_EQ(page_storage->getMaxId(), 20001); // max id will not be updated, ignore this check } { diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp index 6ea5297a600..1652a2f6af0 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp @@ -67,7 +67,7 @@ class PageStorageMixedTest : public DB::base::TiFlashStorageTestBasic storage_pool_v2 = std::make_unique(global_context, TEST_NAMESPACE_ID, *storage_path_pool_v2, "test.t1"); global_context.setPageStorageRunMode(PageStorageRunMode::MIX_MODE); - storage_pool_mix = std::make_unique(global_context, + storage_pool_mix = std::make_unique(*db_context, TEST_NAMESPACE_ID, *storage_path_pool_v2, "test.t1"); @@ -683,7 +683,7 @@ try { ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); - ASSERT_EQ(storage_pool_mix->newLogPageId(), 4); + // ASSERT_EQ(storage_pool_mix->newLogPageId(), 4); // max id for v3 will not be updated, ignore this check } } CATCH @@ -728,7 +728,7 @@ try { ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::ONLY_V3); - ASSERT_EQ(storage_pool_mix->newLogPageId(), 2); + // ASSERT_EQ(storage_pool_mix->newLogPageId(), 2); // max id for v3 will not be updated, ignore this check } } CATCH @@ -758,7 +758,7 @@ try { ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); ASSERT_EQ(page_reader_mix->getNormalPageId(2), 1); - ASSERT_EQ(storage_pool_mix->newLogPageId(), 3); + // ASSERT_EQ(storage_pool_mix->newLogPageId(), 3); // max id for v3 will not be updated, ignore this check } auto snapshot_before_del = page_reader_mix->getSnapshot("ReadWithSnapshotBeforeDelOrigin"); diff --git a/dbms/src/Storages/Page/WriteBatch.h b/dbms/src/Storages/Page/WriteBatch.h index 209322ef68b..f66a2214502 100644 --- a/dbms/src/Storages/Page/WriteBatch.h +++ b/dbms/src/Storages/Page/WriteBatch.h @@ -82,11 +82,6 @@ class WriteBatch : private boost::noncopyable explicit WriteBatch(NamespaceId namespace_id_) : namespace_id(namespace_id_) {} - WriteBatch(WriteBatch && rhs) - : writes(std::move(rhs.writes)) - , sequence(rhs.sequence) - , namespace_id(rhs.namespace_id) - {} void putPage(PageIdU64 page_id, UInt64 tag, const ReadBufferPtr & read_buffer, PageSize size, const PageFieldSizes & data_sizes = {}) { @@ -186,13 +181,6 @@ class WriteBatch : private boost::noncopyable return count; } - void swap(WriteBatch & o) - { - writes.swap(o.writes); - std::swap(o.total_data_size, total_data_size); - std::swap(o.sequence, sequence); - } - void copyWrite(const Write write) { writes.emplace_back(write); @@ -273,6 +261,21 @@ class WriteBatch : private boost::noncopyable return fmt_buffer.toString(); } + WriteBatch(WriteBatch && rhs) + : writes(std::move(rhs.writes)) + , sequence(rhs.sequence) + , namespace_id(rhs.namespace_id) + , total_data_size(rhs.total_data_size) + {} + + void swap(WriteBatch & o) + { + writes.swap(o.writes); + std::swap(o.sequence, sequence); + std::swap(o.namespace_id, namespace_id); + std::swap(o.total_data_size, total_data_size); + } + private: Writes writes; SequenceID sequence = 0; diff --git a/dbms/src/Storages/Page/WriteBatchWrapper.h b/dbms/src/Storages/Page/WriteBatchWrapper.h new file mode 100644 index 00000000000..07c66dc868b --- /dev/null +++ b/dbms/src/Storages/Page/WriteBatchWrapper.h @@ -0,0 +1,161 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes + +enum class PageStorageRunMode : UInt8 +{ + ONLY_V2 = 1, + ONLY_V3 = 2, + MIX_MODE = 3, + UNI_PS = 4, +}; + +// It contains either an UniversalWriteBatch or a WriteBatch. +class WriteBatchWrapper : private boost::noncopyable +{ +public: + explicit WriteBatchWrapper(PageStorageRunMode mode, StorageType tag, NamespaceId ns_id) + { + switch (mode) + { + case PageStorageRunMode::UNI_PS: + uwb = std::make_unique(UniversalPageIdFormat::toFullPrefix(tag, ns_id)); + wb = nullptr; + break; + default: + wb = std::make_unique(ns_id); + uwb = nullptr; + break; + } + } + + explicit WriteBatchWrapper(PageStorageRunMode mode, std::variant && prefix) + { + switch (mode) + { + case PageStorageRunMode::UNI_PS: + uwb = std::make_unique(std::move(std::get(prefix))); + wb = nullptr; + break; + default: + wb = std::make_unique(std::get(prefix)); + uwb = nullptr; + break; + } + } + +#ifdef DBMS_PUBLIC_GTEST + WriteBatchWrapper(WriteBatch && wb_) + : wb(std::make_unique(std::move(wb_))) + , uwb(nullptr) + {} +#endif + + WriteBatchWrapper(WriteBatchWrapper && rhs) + : wb(std::move(rhs.wb)) + , uwb(std::move(rhs.uwb)) + {} + + void putPage(PageIdU64 page_id, UInt64 tag, const ReadBufferPtr & read_buffer, PageSize size, const PageFieldSizes & data_sizes = {}) + { + if (wb) + wb->putPage(page_id, tag, read_buffer, size, data_sizes); + else + uwb->putPage(page_id, tag, read_buffer, size, data_sizes); + } + + void putExternal(PageIdU64 page_id, UInt64 tag) + { + if (wb) + wb->putExternal(page_id, tag); + else + uwb->putExternal(page_id, tag); + } + + // Add RefPage{ref_id} -> Page{page_id} + void putRefPage(PageIdU64 ref_id, PageIdU64 page_id) + { + if (wb) + wb->putRefPage(ref_id, page_id); + else + uwb->putRefPage(ref_id, page_id); + } + + void delPage(PageIdU64 page_id) + { + if (wb) + wb->delPage(page_id); + else + uwb->delPage(page_id); + } + + bool empty() const + { + if (wb) + return wb->empty(); + else + return uwb->empty(); + } + + void clear() + { + if (wb) + wb->clear(); + else + uwb->clear(); + } + + const WriteBatch & getWriteBatch() const + { + return *wb; + } + + const UniversalWriteBatch & getUniversalWriteBatch() const + { + return *uwb; + } + + WriteBatch && releaseWriteBatch() + { + return std::move(*wb); + } + + UniversalWriteBatch && releaseUniversalWriteBatch() + { + return std::move(*uwb); + } + +private: + std::unique_ptr wb; + std::unique_ptr uwb; +}; +} // namespace DB diff --git a/dbms/src/Storages/PathPool.cpp b/dbms/src/Storages/PathPool.cpp index 71c9a7b7a8b..510c802d4dc 100644 --- a/dbms/src/Storages/PathPool.cpp +++ b/dbms/src/Storages/PathPool.cpp @@ -52,6 +52,12 @@ inline String getNormalizedPath(const String & s) return removeTrailingSlash(Poco::Path{s}.toString()); } +const String PathPool::log_path_prefix = "log"; +const String PathPool::data_path_prefix = "data"; +const String PathPool::meta_path_prefix = "meta"; +const String PathPool::kvstore_path_prefix = "kvstore"; +const String PathPool::write_uni_path_prefix = "write"; + // Constructor to be used during initialization PathPool::PathPool( const Strings & main_data_paths_, @@ -72,7 +78,7 @@ PathPool::PathPool( for (const auto & s : latest_data_paths) { // Get a normalized path without trailing '/' - auto p = getNormalizedPath(s + "/kvstore"); + auto p = getNormalizedPath(s + "/" + PathPool::kvstore_path_prefix); kvstore_paths.emplace_back(std::move(p)); } } diff --git a/dbms/src/Storages/PathPool.h b/dbms/src/Storages/PathPool.h index 2e13b2d53f0..8bb4572c411 100644 --- a/dbms/src/Storages/PathPool.h +++ b/dbms/src/Storages/PathPool.h @@ -77,6 +77,12 @@ class PathPool const Strings & listGlobalPagePaths() const { return global_page_paths; } + static const String log_path_prefix; + static const String data_path_prefix; + static const String meta_path_prefix; + static const String kvstore_path_prefix; + static const String write_uni_path_prefix; + public: // A thread safe wrapper for storing a map of class PageFilePathMap diff --git a/dbms/src/Storages/Transaction/RegionPersister.cpp b/dbms/src/Storages/Transaction/RegionPersister.cpp index 74ec3709fbf..ac544a6fd65 100644 --- a/dbms/src/Storages/Transaction/RegionPersister.cpp +++ b/dbms/src/Storages/Transaction/RegionPersister.cpp @@ -27,6 +27,7 @@ #include #include +#include #include namespace CurrentMetrics @@ -43,9 +44,9 @@ extern const int LOGICAL_ERROR; void RegionPersister::drop(RegionID region_id, const RegionTaskLock &) { - DB::WriteBatch wb_v2{ns_id}; - wb_v2.delPage(region_id); - page_writer->write(std::move(wb_v2), global_context.getWriteLimiter()); + DB::WriteBatchWrapper wb{run_mode, getWriteBatchPrefix()}; + wb.delPage(region_id); + page_writer->write(std::move(wb), global_context.getWriteLimiter()); } void RegionPersister::computeRegionWriteBuffer(const Region & region, RegionCacheWriteElement & region_write_buffer) @@ -104,13 +105,14 @@ void RegionPersister::doPersist(RegionCacheWriteElement & region_write_buffer, c } auto read_buf = buffer.tryGetReadBuffer(); - DB::WriteBatch wb{ns_id}; + DB::WriteBatchWrapper wb{run_mode, getWriteBatchPrefix()}; wb.putPage(region_id, applied_index, read_buf, region_size); page_writer->write(std::move(wb), global_context.getWriteLimiter()); } RegionPersister::RegionPersister(Context & global_context_, const RegionManager & region_manager_) : global_context(global_context_) + , run_mode(global_context.getPageStorageRunMode()) , region_manager(region_manager_) , log(Logger::get()) {} @@ -147,7 +149,7 @@ void RegionPersister::forceTransformKVStoreV2toV3() // Will rewrite into V3 one by one. // The region data is big. It is not a good idea to combine pages. - page_writer->write(std::move(write_batch_transform), nullptr); + page_writer->writeIntoV3(std::move(write_batch_transform), nullptr); // Record del page_id write_batch_del_v2.delPage(page.page_id); @@ -164,10 +166,8 @@ RegionMap RegionPersister::restore(PathPool & path_pool, const TiFlashRaftProxyH { auto delegator = path_pool.getPSDiskDelegatorRaft(); auto provider = global_context.getFileProvider(); - const auto global_run_mode = global_context.getPageStorageRunMode(); - auto run_mode = global_run_mode; - switch (global_run_mode) + switch (run_mode) { case PageStorageRunMode::ONLY_V2: { @@ -177,10 +177,6 @@ RegionMap RegionPersister::restore(PathPool & path_pool, const TiFlashRaftProxyH { LOG_WARNING(log, "Detect V1 format data, and we will read it using V2 format code."); } - - mergeConfigFromSettings(global_context.getSettingsRef(), config); - config.num_write_slots = 4; // extend write slots to 4 at least - auto page_storage_v2 = std::make_shared( "RegionPersister", delegator, @@ -188,8 +184,8 @@ RegionMap RegionPersister::restore(PathPool & path_pool, const TiFlashRaftProxyH provider, global_context.getPSBackgroundPool()); page_storage_v2->restore(); - page_writer = std::make_shared(global_run_mode, page_storage_v2, /*storage_v3_*/ nullptr); - page_reader = std::make_shared(global_run_mode, ns_id, page_storage_v2, /*storage_v3_*/ nullptr, /*readlimiter*/ global_context.getReadLimiter()); + page_writer = std::make_shared(run_mode, StorageType::KVStore, page_storage_v2, /*storage_v3_*/ nullptr, /*uni_ps_*/ nullptr); + page_reader = std::make_shared(run_mode, StorageType::KVStore, ns_id, page_storage_v2, /*storage_v3_*/ nullptr, /*uni_ps_*/ nullptr, /*readlimiter*/ global_context.getReadLimiter()); break; } case PageStorageRunMode::ONLY_V3: @@ -198,12 +194,12 @@ RegionMap RegionPersister::restore(PathPool & path_pool, const TiFlashRaftProxyH auto page_storage_v3 = std::make_shared( // "RegionPersister", - path_pool.getPSDiskDelegatorGlobalMulti("kvstore"), + path_pool.getPSDiskDelegatorGlobalMulti(PathPool::kvstore_path_prefix), config, provider); page_storage_v3->restore(); - page_writer = std::make_shared(global_run_mode, /*storage_v2_*/ nullptr, page_storage_v3); - page_reader = std::make_shared(global_run_mode, ns_id, /*storage_v2_*/ nullptr, page_storage_v3, global_context.getReadLimiter()); + page_writer = std::make_shared(run_mode, StorageType::KVStore, /*storage_v2_*/ nullptr, page_storage_v3, /*uni_ps_*/ nullptr); + page_reader = std::make_shared(run_mode, StorageType::KVStore, ns_id, /*storage_v2_*/ nullptr, page_storage_v3, /*uni_ps_*/ nullptr, global_context.getReadLimiter()); break; } case PageStorageRunMode::MIX_MODE: @@ -220,7 +216,7 @@ RegionMap RegionPersister::restore(PathPool & path_pool, const TiFlashRaftProxyH // Because V2 will delete all invalid(unrecognized) file when it restore auto page_storage_v3 = std::make_shared( // "RegionPersister", - path_pool.getPSDiskDelegatorGlobalMulti("kvstore"), + path_pool.getPSDiskDelegatorGlobalMulti(PathPool::kvstore_path_prefix), config, provider); @@ -229,8 +225,8 @@ RegionMap RegionPersister::restore(PathPool & path_pool, const TiFlashRaftProxyH if (const auto & kvstore_remain_pages = page_storage_v2->getNumberOfPages(); kvstore_remain_pages != 0) { - page_writer = std::make_shared(global_run_mode, page_storage_v2, page_storage_v3); - page_reader = std::make_shared(global_run_mode, ns_id, page_storage_v2, page_storage_v3, global_context.getReadLimiter()); + page_writer = std::make_shared(run_mode, StorageType::KVStore, page_storage_v2, page_storage_v3, /*uni_ps_*/ nullptr); + page_reader = std::make_shared(run_mode, StorageType::KVStore, ns_id, page_storage_v2, page_storage_v3, /*uni_ps_*/ nullptr, global_context.getReadLimiter()); LOG_INFO(log, "Current kvstore transform to V3 begin [pages_before_transform={}]", kvstore_remain_pages); forceTransformKVStoreV2toV3(); @@ -257,16 +253,23 @@ RegionMap RegionPersister::restore(PathPool & path_pool, const TiFlashRaftProxyH page_storage_v2 = nullptr; // Must use PageStorageRunMode::ONLY_V3 here. - page_writer = std::make_shared(PageStorageRunMode::ONLY_V3, /*storage_v2_*/ nullptr, page_storage_v3); - page_reader = std::make_shared(PageStorageRunMode::ONLY_V3, ns_id, /*storage_v2_*/ nullptr, page_storage_v3, global_context.getReadLimiter()); + page_writer = std::make_shared(PageStorageRunMode::ONLY_V3, StorageType::KVStore, /*storage_v2_*/ nullptr, page_storage_v3, /*uni_ps_*/ nullptr); + page_reader = std::make_shared(PageStorageRunMode::ONLY_V3, StorageType::KVStore, ns_id, /*storage_v2_*/ nullptr, page_storage_v3, /*uni_ps_*/ nullptr, global_context.getReadLimiter()); run_mode = PageStorageRunMode::ONLY_V3; break; } + case PageStorageRunMode::UNI_PS: + { + auto uni_ps = global_context.getWriteNodePageStorage(); + page_writer = std::make_shared(run_mode, StorageType::KVStore, /*storage_v2_*/ nullptr, /*storage_v3_*/ nullptr, uni_ps); + page_reader = std::make_shared(run_mode, StorageType::KVStore, ns_id, /*storage_v2_*/ nullptr, /*storage_v3_*/ nullptr, uni_ps, global_context.getReadLimiter()); + break; + } } CurrentMetrics::set(CurrentMetrics::RegionPersisterRunMode, static_cast(run_mode)); - LOG_INFO(log, "RegionPersister running. Current Run Mode is {}", static_cast(run_mode)); + LOG_INFO(log, "RegionPersister running. Current Run Mode is {}", magic_enum::enum_name(run_mode)); } RegionMap regions; diff --git a/dbms/src/Storages/Transaction/RegionPersister.h b/dbms/src/Storages/Transaction/RegionPersister.h index 2a6a6ec4eef..bde2c2c6362 100644 --- a/dbms/src/Storages/Transaction/RegionPersister.h +++ b/dbms/src/Storages/Transaction/RegionPersister.h @@ -34,13 +34,6 @@ class RegionTaskLock; struct RegionManager; struct TiFlashRaftProxyHelper; -namespace PS -{ -namespace V1 -{ -class PageStorage; -} -} // namespace PS class PageStorage; class RegionPersister final : private boost::noncopyable @@ -61,20 +54,27 @@ class RegionPersister final : private boost::noncopyable FileUsageStatistics getFileUsageStatistics() const; -#ifndef DBMS_PUBLIC_GTEST private: -#endif - void forceTransformKVStoreV2toV3(); void doPersist(RegionCacheWriteElement & region_write_buffer, const RegionTaskLock & lock, const Region & region); void doPersist(const Region & region, const RegionTaskLock * lock); -#ifndef DBMS_PUBLIC_GTEST private: -#endif + inline std::variant getWriteBatchPrefix() const + { + switch (run_mode) + { + case PageStorageRunMode::UNI_PS: + return UniversalPageIdFormat::toSubPrefix(StorageType::KVStore); + default: + return ns_id; + } + } +private: Context & global_context; + PageStorageRunMode run_mode; PageWriterPtr page_writer; PageReaderPtr page_reader; diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 2e1f7bb18cf..8ae3be454ac 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -971,7 +971,7 @@ TEST_F(RegionKVStoreTest, KVStore) { auto region_id = 19; auto region = makeRegion(region_id, RecordKVFormat::genKey(1, 50), RecordKVFormat::genKey(1, 60)); - auto region_id_str = std::to_string(19); + auto region_id_str = std::to_string(region_id); auto & mmp = MockSSTReader::getMockSSTData(); MockSSTReader::getMockSSTData().clear(); MockSSTReader::Data default_kv_list; @@ -995,7 +995,7 @@ TEST_F(RegionKVStoreTest, KVStore) 8, 5, ctx.getTMTContext()); - ASSERT_EQ(kvs.getRegion(19)->checkIndex(8), true); + ASSERT_EQ(kvs.getRegion(region_id)->checkIndex(8), true); try { kvs.handleApplySnapshot( @@ -1009,7 +1009,7 @@ TEST_F(RegionKVStoreTest, KVStore) } catch (Exception & e) { - ASSERT_EQ(e.message(), "[region 19] already has newer apply-index 8 than 6, should not happen"); + ASSERT_EQ(e.message(), fmt::format("[region {}] already has newer apply-index 8 than 6, should not happen", region_id)); } } @@ -1091,7 +1091,7 @@ TEST_F(RegionKVStoreTest, KVStore) { auto region_id = 19; - auto region_id_str = std::to_string(19); + auto region_id_str = std::to_string(region_id); auto & mmp = MockSSTReader::getMockSSTData(); MockSSTReader::getMockSSTData().clear(); MockSSTReader::Data default_kv_list; @@ -1119,7 +1119,7 @@ TEST_F(RegionKVStoreTest, KVStore) 100, 1, ctx.getTMTContext()); - ASSERT_EQ(kvs.getRegion(19)->checkIndex(100), true); + ASSERT_EQ(kvs.getRegion(region_id)->checkIndex(100), true); } } diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_persister.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_persister.cpp index 5d62778a1f4..936c7ea48ef 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_persister.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_persister.cpp @@ -196,13 +196,16 @@ try } CATCH - -class RegionPersisterTest : public ::testing::Test +class RegionPersisterTest + : public ::testing::Test + , public testing::WithParamInterface { public: RegionPersisterTest() : dir_path(TiFlashTestEnv::getTemporaryPath("/region_persister_test")) { + test_run_mode = GetParam(); + old_run_mode = test_run_mode; } static void SetUpTestCase() {} @@ -210,11 +213,12 @@ class RegionPersisterTest : public ::testing::Test void SetUp() override { TiFlashTestEnv::tryRemovePath(dir_path); + auto & global_ctx = DB::tests::TiFlashTestEnv::getGlobalContext(); + old_run_mode = global_ctx.getPageStorageRunMode(); + global_ctx.setPageStorageRunMode(test_run_mode); - auto & global_ctx = TiFlashTestEnv::getGlobalContext(); auto path_capacity = global_ctx.getPathCapacity(); auto provider = global_ctx.getFileProvider(); - Strings main_data_paths{dir_path}; mocked_path_pool = std::make_unique( main_data_paths, @@ -222,15 +226,30 @@ class RegionPersisterTest : public ::testing::Test /*kvstore_paths=*/Strings{}, path_capacity, provider); + global_ctx.initializeWriteNodePageStorageIfNeed(*mocked_path_pool); + } + + void reload() + { + auto & global_ctx = DB::tests::TiFlashTestEnv::getGlobalContext(); + global_ctx.initializeWriteNodePageStorageIfNeed(*mocked_path_pool); + } + + void TearDown() override + { + auto & global_ctx = TiFlashTestEnv::getGlobalContext(); + global_ctx.setPageStorageRunMode(old_run_mode); } protected: + PageStorageRunMode test_run_mode; String dir_path; + PageStorageRunMode old_run_mode; std::unique_ptr mocked_path_pool; }; -TEST_F(RegionPersisterTest, persister) +TEST_P(RegionPersisterTest, persister) try { RegionManager region_manager; @@ -265,13 +284,26 @@ try { // Truncate the last byte of the meta to mock that the last region persist is not completed - auto meta_path = dir_path + "/page/kvstore/wal/log_1_0"; // First page + String meta_path; + switch (test_run_mode) + { + case PageStorageRunMode::ONLY_V3: + meta_path = dir_path + "/page/kvstore/wal/log_1_0"; // First page + break; + case PageStorageRunMode::UNI_PS: + meta_path = dir_path + "/page/write/wal/log_1_0"; // First page + break; + default: + throw Exception("", ErrorCodes::NOT_IMPLEMENTED); + } Poco::File meta_file(meta_path); size_t size = meta_file.getSize(); int ret = ::truncate(meta_path.c_str(), size - 1); // Remove last one byte ASSERT_EQ(ret, 0); } + reload(); + RegionMap new_regions; { RegionPersister persister(ctx, region_manager); @@ -299,5 +331,9 @@ try } CATCH +INSTANTIATE_TEST_CASE_P( + TestMode, + RegionPersisterTest, + testing::Values(PageStorageRunMode::ONLY_V3, PageStorageRunMode::UNI_PS)); } // namespace tests } // namespace DB diff --git a/dbms/src/Storages/Transaction/tests/kvstore_helper.h b/dbms/src/Storages/Transaction/tests/kvstore_helper.h index 1d040feae65..f5504fdb1fd 100644 --- a/dbms/src/Storages/Transaction/tests/kvstore_helper.h +++ b/dbms/src/Storages/Transaction/tests/kvstore_helper.h @@ -79,7 +79,6 @@ class RegionKVStoreTest : public ::testing::Test { // clean data and create path pool instance path_pool = createCleanPathPool(test_path); - reloadKVSFromDisk(); proxy_instance = std::make_unique(); @@ -102,6 +101,7 @@ class RegionKVStoreTest : public ::testing::Test { kvstore.reset(); auto & global_ctx = TiFlashTestEnv::getGlobalContext(); + global_ctx.initializeWriteNodePageStorageIfNeed(*path_pool); kvstore = std::make_unique(global_ctx); // only recreate kvstore and restore data from disk, don't recreate proxy instance kvstore->restore(*path_pool, proxy_helper.get()); @@ -163,4 +163,4 @@ class RegionKVStoreTest : public ::testing::Test std::unique_ptr proxy_helper; }; } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/TestUtils/TiFlashTestEnv.cpp b/dbms/src/TestUtils/TiFlashTestEnv.cpp index 80000fde2c1..df9dc344f2c 100644 --- a/dbms/src/TestUtils/TiFlashTestEnv.cpp +++ b/dbms/src/TestUtils/TiFlashTestEnv.cpp @@ -138,6 +138,7 @@ void TiFlashTestEnv::addGlobalContext(Strings testdata_path, PageStorageRunMode global_context->setPageStorageRunMode(ps_run_mode); global_context->initializeGlobalStoragePoolIfNeed(global_context->getPathPool()); + global_context->initializeWriteNodePageStorageIfNeed(global_context->getPathPool()); LOG_INFO(Logger::get(), "Storage mode : {}", static_cast(global_context->getPageStorageRunMode())); TiFlashRaftConfig raft_config; @@ -172,6 +173,7 @@ Context TiFlashTestEnv::getContext(const DB::Settings & settings, Strings testda auto paths = getPathPool(testdata_path); context.setPathPool(paths.first, paths.second, Strings{}, context.getPathCapacity(), context.getFileProvider()); global_contexts[0]->initializeGlobalStoragePoolIfNeed(context.getPathPool()); + global_contexts[0]->initializeWriteNodePageStorageIfNeed(context.getPathPool()); context.getSettingsRef() = settings; return context; } diff --git a/dbms/src/TestUtils/gtests_dbms_main.cpp b/dbms/src/TestUtils/gtests_dbms_main.cpp index 3bcf1659eb2..4146a8ec48b 100644 --- a/dbms/src/TestUtils/gtests_dbms_main.cpp +++ b/dbms/src/TestUtils/gtests_dbms_main.cpp @@ -59,7 +59,9 @@ int main(int argc, char ** argv) install_fault_signal_handlers({SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGTERM}); DB::tests::TiFlashTestEnv::setupLogger(); - DB::tests::TiFlashTestEnv::initializeGlobalContext(); + auto run_mode = DB::PageStorageRunMode::ONLY_V3; + DB::tests::TiFlashTestEnv::initializeGlobalContext(/*testdata_path*/ {}, run_mode); + DB::ServerInfo server_info; // `DMFileReaderPool` should be constructed before and destructed after `SegmentReaderPoolManager`. DB::DM::DMFileReaderPool::instance();