From 39cb73338ba9f8ffde783f0e694313cc73e57c35 Mon Sep 17 00:00:00 2001 From: liaoxin Date: Mon, 3 Jul 2023 18:35:05 +0800 Subject: [PATCH] [enhancement](merge-on-write) split delete bitmap from tablet meta when publish --- be/src/olap/data_dir.cpp | 18 +++++ be/src/olap/olap_meta.cpp | 23 ++++++ be/src/olap/olap_meta.h | 7 +- be/src/olap/storage_engine.cpp | 31 ++++++++ be/src/olap/storage_engine.h | 2 + be/src/olap/tablet.cpp | 4 + be/src/olap/tablet_meta_manager.cpp | 95 +++++++++++++++++++++++ be/src/olap/tablet_meta_manager.h | 20 +++++ be/src/olap/txn_manager.cpp | 6 +- be/test/olap/tablet_meta_manager_test.cpp | 73 +++++++++++++++++ 10 files changed, 276 insertions(+), 3 deletions(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 970138cd0e1941..196c8be1c821dd 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -539,6 +540,23 @@ Status DataDir::load() { } } + auto load_delete_bitmap_func = [this](int64_t tablet_id, RowsetId rowset_id, int64_t segment_id, + int64_t version, const string& val) { + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id); + if (!tablet) { + return true; + } + const std::vector& all_rowsets = tablet->tablet_meta()->all_rs_metas(); + for (auto& rowset_meta : all_rowsets) { + if (rowset_meta->rowset_id() == rowset_id) { + tablet->tablet_meta()->delete_bitmap().delete_bitmap[{ + rowset_id, segment_id, version}] = roaring::Roaring::read(val.data()); + } + } + return true; + }; + TabletMetaManager::traverse_delete_bitmap(_meta, load_delete_bitmap_func); + // At startup, we only count these invalid rowset, but do not actually delete it. // The actual delete operation is in StorageEngine::_clean_unused_rowset_metas, // which is cleaned up uniformly by the background cleanup thread. diff --git a/be/src/olap/olap_meta.cpp b/be/src/olap/olap_meta.cpp index 4df89c04dc31f2..79c0441747968d 100644 --- a/be/src/olap/olap_meta.cpp +++ b/be/src/olap/olap_meta.cpp @@ -216,6 +216,29 @@ Status OlapMeta::put(const int column_family_index, const std::vectormeta_write_request_total->increment(1); + + rocksdb::Status s; + { + int64_t duration_ns = 0; + Defer defer([&] { + DorisMetrics::instance()->meta_write_request_duration_us->increment(duration_ns / 1000); + }); + SCOPED_RAW_TIMER(&duration_ns); + + WriteOptions write_options; + write_options.sync = config::sync_tablet_meta; + s = _db->Write(write_options, batch); + } + + if (!s.ok()) { + LOG(WARNING) << "rocks db put batch failed, reason:" << s.ToString(); + return Status::Error(); + } + return Status::OK(); +} + Status OlapMeta::remove(const int column_family_index, const std::string& key) { DorisMetrics::instance()->meta_write_request_total->increment(1); auto& handle = _handles[column_family_index]; diff --git a/be/src/olap/olap_meta.h b/be/src/olap/olap_meta.h index 0b5e40045aee15..174f2d065f4ca2 100644 --- a/be/src/olap/olap_meta.h +++ b/be/src/olap/olap_meta.h @@ -27,6 +27,7 @@ namespace rocksdb { class ColumnFamilyHandle; class DB; +class WriteBatch; } // namespace rocksdb namespace doris { @@ -41,7 +42,6 @@ class OlapMeta final { : key(key_arg), value(value_arg) {} }; -public: OlapMeta(const std::string& root_path); ~OlapMeta(); @@ -53,6 +53,7 @@ class OlapMeta final { Status put(const int column_family_index, const std::string& key, const std::string& value); Status put(const int column_family_index, const std::vector& entries); + Status put(rocksdb::WriteBatch* batch); Status remove(const int column_family_index, const std::string& key); Status remove(const int column_family_index, const std::vector& keys); @@ -62,6 +63,10 @@ class OlapMeta final { std::string get_root_path() const { return _root_path; } + rocksdb::ColumnFamilyHandle* get_handle(const int column_family_index) { + return _handles[column_family_index].get(); + } + private: std::string _root_path; // keep order of _db && _handles, we need destroy _handles before _db diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 60b0b023c6609f..61750057323326 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include "agent/task_worker_pool.h" @@ -64,6 +65,7 @@ #include "olap/single_replica_compaction.h" #include "olap/tablet_manager.h" #include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" #include "olap/task/engine_task.h" #include "olap/txn_manager.h" #include "runtime/memory/mem_tracker.h" @@ -696,6 +698,9 @@ Status StorageEngine::start_trash_sweep(double* usage, bool ignore_guard) { // clean unused rowset metas in OlapMeta _clean_unused_rowset_metas(); + // cleand unused delete bitmap for deleted tablet + _clean_unused_delete_bitmap(); + // clean unused rowsets in remote storage backends for (auto data_dir : get_stores()) { data_dir->perform_remote_rowset_gc(); @@ -771,6 +776,32 @@ void StorageEngine::_clean_unused_rowset_metas() { } } +void StorageEngine::_clean_unused_delete_bitmap() { + std::unordered_set removed_tablets; + auto clean_delete_bitmap_func = [this, &removed_tablets](int64_t tablet_id, RowsetId rowset_id, + int64_t segment_id, int64_t version, + const std::string& val) -> bool { + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id); + if (tablet == nullptr) { + if (removed_tablets.insert(tablet_id).second) { + LOG(INFO) << "clean ununsed delete bitmap for deleted tablet, tablet_id: " + << tablet_id; + } + } + return true; + }; + auto data_dirs = get_stores(); + for (auto data_dir : data_dirs) { + TabletMetaManager::traverse_delete_bitmap(data_dir->get_meta(), clean_delete_bitmap_func); + for (auto id : removed_tablets) { + TabletMetaManager::remove_delete_bitmap_by_tablet_id(data_dir, id); + } + LOG(INFO) << "removed invalid delete bitmap from dir: " << data_dir->path() + << ", deleted tablets size: " << removed_tablets.size(); + removed_tablets.clear(); + } +} + void StorageEngine::gc_binlogs(const std::unordered_map& gc_tablet_infos) { for (auto [tablet_id, version] : gc_tablet_infos) { LOG(INFO) << fmt::format("start to gc binlogs for tablet_id: {}, version: {}", tablet_id, diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 011310156513cf..d6215586eda9df 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -247,6 +247,8 @@ class StorageEngine { void _clean_unused_rowset_metas(); + void _clean_unused_delete_bitmap(); + Status _do_sweep(const std::string& scan_root, const time_t& local_tm_now, const int32_t expire); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index a47728941dff8c..c29f9b6e211040 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -101,6 +101,7 @@ #include "olap/storage_policy.h" #include "olap/tablet_manager.h" #include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" #include "olap/tablet_schema.h" #include "olap/txn_manager.h" #include "olap/types.h" @@ -1514,6 +1515,9 @@ bool Tablet::do_tablet_meta_checkpoint() { rs_meta->set_remove_from_rowset_meta(); } + TabletMetaManager::remove_old_version_delete_bitmap(_data_dir, tablet_id(), + max_version_unlocked().second); + _newly_created_rowset_num = 0; _last_checkpoint_time = UnixMillis(); return true; diff --git a/be/src/olap/tablet_meta_manager.cpp b/be/src/olap/tablet_meta_manager.cpp index d3bc41fa193b2f..1cb7203e90b7ce 100644 --- a/be/src/olap/tablet_meta_manager.cpp +++ b/be/src/olap/tablet_meta_manager.cpp @@ -18,22 +18,29 @@ #include "olap/tablet_meta_manager.h" #include +#include #include +#include #include +#include #include +#include #include #include #include #include #include "common/logging.h" +#include "gutil/endian.h" #include "json2pb/json_to_pb.h" #include "json2pb/pb_to_json.h" #include "olap/data_dir.h" #include "olap/olap_define.h" #include "olap/olap_meta.h" +#include "olap/tablet_meta.h" #include "olap/utils.h" +#include "util/coding.h" namespace rocksdb { class Iterator; @@ -214,4 +221,92 @@ Status TabletMetaManager::traverse_pending_publish( return status; } +std::string TabletMetaManager::encode_delete_bitmap_key(TTabletId tablet_id, int64_t version, + const RowsetId& rowset_id, + int64_t segment_id) { + std::string key; + key.reserve(56); + key.append(DELETE_BITMAP); + put_fixed64_le(&key, BigEndian::FromHost64(tablet_id)); + put_fixed64_le(&key, BigEndian::FromHost64(version)); + put_fixed32_le(&key, BigEndian::FromHost32(rowset_id.version)); + put_fixed64_le(&key, BigEndian::FromHost64(rowset_id.hi)); + put_fixed64_le(&key, BigEndian::FromHost64(rowset_id.mi)); + put_fixed64_le(&key, BigEndian::FromHost64(rowset_id.lo)); + put_fixed64_le(&key, BigEndian::FromHost64(segment_id)); + return key; +} + +void TabletMetaManager::decode_delete_bitmap_key(const string& enc_key, TTabletId* tablet_id, + int64_t* version, RowsetId* rowset_id, + int64_t* segment_id) { + DCHECK_EQ(enc_key.size(), 56); + *tablet_id = BigEndian::ToHost64(UNALIGNED_LOAD64(enc_key.data() + 4)); + *version = BigEndian::ToHost64(UNALIGNED_LOAD64(enc_key.data() + 12)); + rowset_id->version = BigEndian::ToHost32(UNALIGNED_LOAD32(enc_key.data() + 20)); + rowset_id->hi = BigEndian::ToHost64(UNALIGNED_LOAD64(enc_key.data() + 24)); + rowset_id->mi = BigEndian::ToHost64(UNALIGNED_LOAD64(enc_key.data() + 32)); + rowset_id->lo = BigEndian::ToHost64(UNALIGNED_LOAD64(enc_key.data() + 40)); + *segment_id = BigEndian::ToHost64(UNALIGNED_LOAD64(enc_key.data() + 48)); +} + +Status TabletMetaManager::save_delete_bitmap(DataDir* store, TTabletId tablet_id, + DeleteBitmapPtr delete_bimap, int64_t version) { + if (delete_bimap->delete_bitmap.empty()) { + return Status::OK(); + } + OlapMeta* meta = store->get_meta(); + rocksdb::WriteBatch batch; + rocksdb::ColumnFamilyHandle* cf = meta->get_handle(META_COLUMN_FAMILY_INDEX); + for (auto& [id, bitmap] : delete_bimap->delete_bitmap) { + auto& rowset_id = std::get<0>(id); + int64_t segment_id = std::get<1>(id); + std::string key = encode_delete_bitmap_key(tablet_id, version, rowset_id, segment_id); + std::string value(bitmap.getSizeInBytes(), '\0'); + bitmap.write(value.data()); + batch.Put(cf, key, value); + } + return meta->put(&batch); +} + +Status TabletMetaManager::traverse_delete_bitmap( + OlapMeta* meta, + std::function const& func) { + auto traverse_header_func = [&func](const std::string& key, const std::string& value) -> bool { + TTabletId tablet_id; + int64_t version; + RowsetId rowset_id; + int64_t segment_id; + decode_delete_bitmap_key(key, &tablet_id, &version, &rowset_id, &segment_id); + VLOG_NOTICE << "traverse delete bitmap, key: |" << tablet_id << "|" << rowset_id << "|" + << segment_id << "|" << version; + return func(tablet_id, rowset_id, segment_id, version, value); + }; + Status status = meta->iterate(META_COLUMN_FAMILY_INDEX, DELETE_BITMAP, traverse_header_func); + return status; +} + +Status TabletMetaManager::remove_old_version_delete_bitmap(DataDir* store, TTabletId tablet_id, + int64_t version) { + OlapMeta* meta = store->get_meta(); + rocksdb::WriteBatch batch; + rocksdb::ColumnFamilyHandle* cf = meta->get_handle(META_COLUMN_FAMILY_INDEX); + auto lower_key = encode_delete_bitmap_key(tablet_id, 0, RowsetId(), 0); + auto upper_key = encode_delete_bitmap_key(tablet_id, version + 1, RowsetId(), 0); + batch.DeleteRange(cf, lower_key, upper_key); + LOG(INFO) << "remove delete bitmap, tablet_id: " << tablet_id << " version: " << version; + return meta->put(&batch); +} + +Status TabletMetaManager::remove_delete_bitmap_by_tablet_id(DataDir* store, TTabletId tablet_id) { + OlapMeta* meta = store->get_meta(); + rocksdb::WriteBatch batch; + rocksdb::ColumnFamilyHandle* cf = meta->get_handle(META_COLUMN_FAMILY_INDEX); + auto lower_key = encode_delete_bitmap_key(tablet_id, 0, RowsetId(), 0); + auto upper_key = encode_delete_bitmap_key(tablet_id, INT64_MAX, RowsetId(), 0); + batch.DeleteRange(cf, lower_key, upper_key); + LOG(INFO) << "remove delete bitmap by tablet_id, tablet_id: " << tablet_id; + return meta->put(&batch); +} + } // namespace doris diff --git a/be/src/olap/tablet_meta_manager.h b/be/src/olap/tablet_meta_manager.h index 6ba1d76757426b..17b89ff30504b7 100644 --- a/be/src/olap/tablet_meta_manager.h +++ b/be/src/olap/tablet_meta_manager.h @@ -36,6 +36,8 @@ const std::string HEADER_PREFIX = "tabletmeta_"; const std::string PENDING_PUBLISH_INFO = "ppi_"; +const std::string DELETE_BITMAP = "dlb_"; + // Helper Class for managing tablet headers of one root path. class TabletMetaManager { public: @@ -69,6 +71,24 @@ class TabletMetaManager { static Status traverse_pending_publish( OlapMeta* meta, std::function const& func); + + static Status save_delete_bitmap(DataDir* store, TTabletId tablet_id, + DeleteBitmapPtr delete_bimap, int64_t version); + + static Status traverse_delete_bitmap(OlapMeta* meta, + std::function const& func); + + static std::string encode_delete_bitmap_key(TTabletId tablet_id, int64_t version, + const RowsetId& rowset_id, int64_t segment_id); + + static void decode_delete_bitmap_key(const string& enc_key, TTabletId* tablet_id, + int64_t* version, RowsetId* rowset_id, + int64_t* segment_id); + static Status remove_old_version_delete_bitmap(DataDir* store, TTabletId tablet_id, + int64_t version); + + static Status remove_delete_bitmap_by_tablet_id(DataDir* store, TTabletId tablet_id); }; } // namespace doris diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp index ebcb2873893610..a0f704da6df8a9 100644 --- a/be/src/olap/txn_manager.cpp +++ b/be/src/olap/txn_manager.cpp @@ -42,6 +42,7 @@ #include "olap/storage_engine.h" #include "olap/tablet_manager.h" #include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" #include "olap/task/engine_publish_version_task.h" #include "util/time.h" @@ -391,8 +392,9 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id, } stats->partial_update_write_segment_us = MonotonicMicros() - t3; int64_t t4 = MonotonicMicros(); - std::shared_lock rlock(tablet->get_header_lock()); - tablet->save_meta(); + RETURN_IF_ERROR(TabletMetaManager::save_delete_bitmap( + tablet->data_dir(), tablet->tablet_id(), tablet_txn_info.delete_bitmap, + version.second)); stats->save_meta_time_us = MonotonicMicros() - t4; } diff --git a/be/test/olap/tablet_meta_manager_test.cpp b/be/test/olap/tablet_meta_manager_test.cpp index 6bd9c5db7dd083..5eba39e001bb21 100644 --- a/be/test/olap/tablet_meta_manager_test.cpp +++ b/be/test/olap/tablet_meta_manager_test.cpp @@ -20,16 +20,19 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include "gtest/gtest_pred_impl.h" #include "olap/data_dir.h" +#include "olap/tablet_meta.h" using std::string; @@ -108,4 +111,74 @@ TEST_F(TabletMetaManagerTest, TestLoad) { // EXPECT_EQ(_json_header, json_meta_read); } +TEST_F(TabletMetaManagerTest, TestDeleteBimapEncode) { + TTabletId tablet_id = 1234; + int64_t version = 456; + RowsetId rowset_id; + rowset_id.init(2, 777, 888, 999); + int64_t segment_id = 5; + std::string key = + TabletMetaManager::encode_delete_bitmap_key(tablet_id, version, rowset_id, segment_id); + + TTabletId de_tablet_id; + int64_t de_version; + RowsetId de_rowset_id; + int64_t de_segment_id; + TabletMetaManager::decode_delete_bitmap_key(key, &de_tablet_id, &de_version, &de_rowset_id, + &de_segment_id); + EXPECT_EQ(tablet_id, de_tablet_id); + EXPECT_EQ(version, de_version); + EXPECT_EQ(rowset_id, de_rowset_id); + EXPECT_EQ(segment_id, de_segment_id); +} + +TEST_F(TabletMetaManagerTest, TestSaveDeleteBimap) { + int64_t test_tablet_id = 10086; + std::shared_ptr dbmp = std::make_shared(test_tablet_id); + auto gen1 = [&dbmp](int64_t max_rst_id, uint32_t max_seg_id, uint32_t max_row) { + for (int64_t rst = 0; rst < max_rst_id; ++rst) { + for (uint32_t seg = 0; seg < max_seg_id; ++seg) { + for (uint32_t row = 0; row < max_row; ++row) { + dbmp->add({RowsetId {2, 0, 1, rst}, seg, 0}, row); + } + } + } + }; + int64_t max_rst_id = 5; + int64_t max_seg_id = 5; + int64_t max_version = 300; + gen1(max_rst_id, max_seg_id, 10); + for (int64_t ver = 0; ver < max_version; ++ver) { + TabletMetaManager::save_delete_bitmap(_data_dir, test_tablet_id, dbmp, ver); + } + size_t num_keys = 0; + auto load_delete_bitmap_func = [&](int64_t tablet_id, RowsetId rowset_id, int64_t segment_id, + int64_t version, const string& val) { + EXPECT_EQ(tablet_id, test_tablet_id); + auto iter = dbmp->delete_bitmap.find({rowset_id, segment_id, 0}); + EXPECT_NE(iter, dbmp->delete_bitmap.end()); + auto bitmap = roaring::Roaring::read(val.data()); + EXPECT_EQ(bitmap.cardinality(), 10); + ++num_keys; + return true; + }; + TabletMetaManager::traverse_delete_bitmap(_data_dir->get_meta(), load_delete_bitmap_func); + EXPECT_EQ(num_keys, max_rst_id * max_seg_id * max_version); + + num_keys = 0; + TabletMetaManager::remove_old_version_delete_bitmap(_data_dir, test_tablet_id, 100); + TabletMetaManager::traverse_delete_bitmap(_data_dir->get_meta(), load_delete_bitmap_func); + EXPECT_EQ(num_keys, max_rst_id * max_seg_id * (max_version - 101)); + + num_keys = 0; + TabletMetaManager::remove_old_version_delete_bitmap(_data_dir, test_tablet_id, 200); + TabletMetaManager::traverse_delete_bitmap(_data_dir->get_meta(), load_delete_bitmap_func); + EXPECT_EQ(num_keys, max_rst_id * max_seg_id * (max_version - 201)); + + num_keys = 0; + TabletMetaManager::remove_delete_bitmap_by_tablet_id(_data_dir, test_tablet_id); + TabletMetaManager::traverse_delete_bitmap(_data_dir->get_meta(), load_delete_bitmap_func); + EXPECT_EQ(num_keys, 0); +} + } // namespace doris