From b9d3fbf4ffe96b7c305407a1b21f41f677f53b62 Mon Sep 17 00:00:00 2001 From: Hooper Date: Tue, 10 Dec 2024 11:24:10 +0800 Subject: [PATCH 1/2] Clear PG for moved out member When a member is moved out of a PG, clean up the PG and reclaim resources: 1. Mark PG state destroyed. 2. Destroy PG shards and shards super block. 3. Reset all chunks. 4. Remove PG index table. 5. Destroy PG super block. 6. Return PG chunks to dev_heap. Additionally, enhance restart test scenarios. --- conanfile.py | 2 +- .../homestore_backend/heap_chunk_selector.cpp | 48 +++++++++++ .../homestore_backend/heap_chunk_selector.h | 14 +++ src/lib/homestore_backend/hs_homeobject.hpp | 48 +++++++++++ src/lib/homestore_backend/hs_pg_manager.cpp | 85 ++++++++++++++++++- .../homestore_backend/hs_shard_manager.cpp | 20 +++++ .../replication_state_machine.cpp | 10 ++- .../tests/homeobj_fixture.hpp | 20 ++++- .../homestore_backend/tests/hs_blob_tests.cpp | 3 + .../homestore_backend/tests/hs_pg_tests.cpp | 11 +-- .../tests/hs_repl_test_helper.hpp | 25 +++--- .../tests/hs_shard_tests.cpp | 3 + .../tests/test_heap_chunk_selector.cpp | 14 ++- .../tests/test_homestore_backend_dynamic.cpp | 43 ++++++++++ 14 files changed, 318 insertions(+), 28 deletions(-) diff --git a/conanfile.py b/conanfile.py index 45c1e144..56646fcf 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.17" + version = "2.1.18" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp index 95d07f7d..3ed07b09 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/heap_chunk_selector.cpp @@ -118,6 +118,54 @@ bool HeapChunkSelector::release_chunk(const pg_id_t pg_id, const chunk_num_t v_c return true; } +bool HeapChunkSelector::reset_pg_chunks(pg_id_t pg_id) { + std::shared_lock lock_guard(m_chunk_selector_mtx); + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); + return false; + } + { + auto pg_chunk_collection = pg_it->second; + std::scoped_lock lock(pg_chunk_collection->mtx); + for (auto& chunk : pg_chunk_collection->m_pg_chunks) { + chunk->reset(); + } + } + return true; +} + +bool HeapChunkSelector::return_pg_chunks_to_dev_heap(const pg_id_t pg_id) { + std::unique_lock lock_guard(m_chunk_selector_mtx); + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); + return false; + } + + auto pg_chunk_collection = pg_it->second; + auto pdev_id = pg_chunk_collection->m_pg_chunks[0]->get_pdev_id(); + auto pdev_it = m_per_dev_heap.find(pdev_id); + RELEASE_ASSERT(pdev_it != m_per_dev_heap.end(), "pdev {} should in per dev heap", pdev_id); + auto pdev_heap = pdev_it->second; + + { + std::scoped_lock lock(pdev_heap->mtx, pg_chunk_collection->mtx); + for (auto& chunk : pg_chunk_collection->m_pg_chunks) { + if (chunk->m_state == ChunkState::INUSE) { + chunk->m_state = ChunkState::AVAILABLE; + } // with shard which should be first + chunk->m_pg_id = std::nullopt; + chunk->m_v_chunk_id = std::nullopt; + + pdev_heap->m_heap.emplace(chunk); + pdev_heap->available_blk_count += chunk->available_blks(); + } + } + m_per_pg_chunks.erase(pg_it); + return true; +} + uint32_t HeapChunkSelector::get_chunk_size() const { const auto chunk = m_chunks.begin()->second; return chunk->size(); diff --git a/src/lib/homestore_backend/heap_chunk_selector.h b/src/lib/homestore_backend/heap_chunk_selector.h index c0a84b00..4c4d8dbe 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.h +++ b/src/lib/homestore_backend/heap_chunk_selector.h @@ -79,6 +79,20 @@ class HeapChunkSelector : public homestore::ChunkSelector { // It is used in two scenarios: 1. seal shard 2. create shard rollback bool release_chunk(const pg_id_t pg_id, const chunk_num_t v_chunk_id); + bool reset_pg_chunks(pg_id_t pg_id); + + /** + * Releases all chunks associated with the specified pg_id. + * + * This function is used to return all chunks that are currently associated with a particular + * pg identified by the given pg_id. It is typically used in scenarios where + * all chunks associated with a pg need to be freed, such as pg move out. + * + * @param pg_id The ID of the protection group whose chunks are to be released. + * @return A boolean value indicating whether the operation was successful. + */ + bool return_pg_chunks_to_dev_heap(pg_id_t pg_id); + /** * select chunks for pg, chunks need to be in same pdev. * diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index 43da9547..bdb05d1d 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -30,6 +30,7 @@ static constexpr uint64_t io_align{512}; PGError toPgError(homestore::ReplServiceError const&); BlobError toBlobError(homestore::ReplServiceError const&); ShardError toShardError(homestore::ReplServiceError const&); +ENUM(PGState, uint8_t, ALIVE = 0, DESTROYED); class HSHomeObject : public HomeObjectImpl { private: @@ -80,6 +81,7 @@ class HSHomeObject : public HomeObjectImpl { struct pg_info_superblk { pg_id_t id; + PGState state; uint32_t num_members; uint32_t num_chunks; peer_id_t replica_set_uuid; @@ -109,6 +111,7 @@ class HSHomeObject : public HomeObjectImpl { pg_info_superblk& operator=(pg_info_superblk const& rhs) { id = rhs.id; + state = rhs.state; num_members = rhs.num_members; num_chunks = rhs.num_chunks; pg_size = rhs.pg_size; @@ -411,6 +414,20 @@ class HSHomeObject : public HomeObjectImpl { void on_pg_replace_member(homestore::group_id_t group_id, const homestore::replica_member_info& member_out, const homestore::replica_member_info& member_in); + /** + * @brief Cleans up and recycles resources for the PG identified by the given pg_id on the current node. + * + * This function is called when the replication device leaves or when a specific PG is destroyed on the current + * node. Note that this function does not perform Raft synchronization with other nodes. + * + * Possible scenarios for calling this function include: + * - A member-out node cleaning up resources for a specified PG. + * - During baseline rsync to clean up PG resources on the current node. + * + * @param pg_id The ID of the PG to be destroyed. + */ + void pg_destroy(pg_id_t pg_id); + /** * @brief Callback function invoked when a message is committed on a shard. * @@ -488,6 +505,7 @@ class HSHomeObject : public HomeObjectImpl { size_t hash_len) const; std::shared_ptr< BlobIndexTable > recover_index_table(homestore::superblk< homestore::index_table_sb >&& sb); + std::optional< pg_id_t > get_pg_id_with_group_id(homestore::group_id_t group_id) const; private: std::shared_ptr< BlobIndexTable > create_index_table(); @@ -512,6 +530,36 @@ class HSHomeObject : public HomeObjectImpl { sisl::io_blob_safe& get_pad_buf(uint32_t pad_len); // void trigger_timed_events(); + + /** + * @brief Marks the PG as destroyed. + * + * Updates the internal state to indicate that the specified PG is destroyed and ensures its state is persisted. + * + * @param pg_id The ID of the PG to be marked as destroyed. + */ + void mark_pg_destroyed(pg_id_t pg_id); + + /** + * @brief Cleans up and recycles resources for shards in the PG located using a PG ID. + * + * @param pg_id The ID of the PG whose shards are to be destroyed. + */ + void destroy_shards(pg_id_t pg_id); + + /** + * @brief Resets the chunks for the given PG ID and triggers a checkpoint flush. + * + * @param pg_id The ID of the PG whose chunks are to be reset. + */ + void reset_pg_chunks(pg_id_t pg_id); + + /** + * @brief Cleans up and recycles resources for the PG located using a pg_id. + * + * @param pg_id The ID of the PG to be cleaned. + */ + void cleanup_pg_resources(pg_id_t pg_id); }; class BlobIndexServiceCallbacks : public homestore::IndexServiceCallbacks { diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 72a3cc21..38612348 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -250,6 +250,79 @@ void HSHomeObject::on_pg_replace_member(homestore::group_id_t group_id, const re boost::uuids::to_string(member_in.id)); } +std::optional< pg_id_t > HSHomeObject::get_pg_id_with_group_id(homestore::group_id_t group_id) const { + auto lg = std::shared_lock(_pg_lock); + auto iter = std::find_if(_pg_map.begin(), _pg_map.end(), [group_id](const auto& entry) { + return pg_repl_dev(*entry.second).group_id() == group_id; + }); + if (iter != _pg_map.end()) { + return iter->first; + } else { + return std::nullopt; + } +} + +void HSHomeObject::pg_destroy(pg_id_t pg_id) { + mark_pg_destroyed(pg_id); + destroy_shards(pg_id); + reset_pg_chunks(pg_id); + cleanup_pg_resources(pg_id); + LOGI("pg {} is destroyed", pg_id); +} +void HSHomeObject::mark_pg_destroyed(pg_id_t pg_id) { + auto lg = std::scoped_lock(_pg_lock); + auto iter = _pg_map.find(pg_id); + if (iter == _pg_map.end()) { + LOGW("on pg destroy with unknown pg_id {}", pg_id); + return; + } + auto& pg = iter->second; + auto hs_pg = s_cast< HS_PG* >(pg.get()); + hs_pg->pg_sb_->state = PGState::DESTROYED; + hs_pg->pg_sb_.write(); +} + +void HSHomeObject::reset_pg_chunks(pg_id_t pg_id) { + bool res = chunk_selector_->reset_pg_chunks(pg_id); + RELEASE_ASSERT(res, "Failed to reset all chunks in pg {}", pg_id); + auto fut = homestore::hs()->cp_mgr().trigger_cp_flush(true /* force */); + auto on_complete = [&](auto success) { + RELEASE_ASSERT(success, "Failed to trigger CP flush"); + LOGI("CP Flush completed"); + }; + on_complete(std::move(fut).get()); +} + +void HSHomeObject::cleanup_pg_resources(pg_id_t pg_id) { + auto lg = std::scoped_lock(_pg_lock); + auto iter = _pg_map.find(pg_id); + if (iter == _pg_map.end()) { + LOGW("on pg resource release with unknown pg_id {}", pg_id); + return; + } + + // destroy index table + auto& pg = iter->second; + auto hs_pg = s_cast< HS_PG* >(pg.get()); + if (nullptr != hs_pg->index_table_) { + auto uuid_str = boost::uuids::to_string(hs_pg->index_table_->uuid()); + index_table_pg_map_.erase(uuid_str); + hs()->index_service().remove_index_table(hs_pg->index_table_); + hs_pg->index_table_->destroy(); + } + + // destroy pg super blk + hs_pg->pg_sb_.destroy(); + + // return pg chunks to dev heap + // which must be done after destroying pg super blk to avoid multiple pg use same chunks + bool res = chunk_selector_->return_pg_chunks_to_dev_heap(pg_id); + RELEASE_ASSERT(res, "Failed to return pg {} chunks to dev_heap", pg_id); + + // erase pg in pg map + _pg_map.erase(iter); +} + void HSHomeObject::add_pg_to_map(unique< HS_PG > hs_pg) { RELEASE_ASSERT(hs_pg->pg_info_.replica_set_uuid == hs_pg->repl_dev_->group_id(), "PGInfo replica set uuid mismatch with ReplDev instance for {}", @@ -317,9 +390,14 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c // add entry in map, so that index recovery can update the PG. std::scoped_lock lg(index_lock_); auto it = index_table_pg_map_.find(uuid_str); - RELEASE_ASSERT(it != index_table_pg_map_.end(), "IndexTable should be recovered before PG"); - hs_pg->index_table_ = it->second.index_table; - it->second.pg_id = pg_id; + if (it != index_table_pg_map_.end()) { + hs_pg->index_table_ = it->second.index_table; + it->second.pg_id = pg_id; + } else { + RELEASE_ASSERT(hs_pg->pg_sb_->state == PGState::DESTROYED, "IndexTable should be recovered before PG"); + hs_pg->index_table_ = nullptr; + LOGI("Index table not found for destroyed pg_id={}, index_table_uuid={}", pg_id, uuid_str); + } add_pg_to_map(std::move(hs_pg)); } @@ -349,6 +427,7 @@ HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, share pg_sb_.create(sizeof(pg_info_superblk) - sizeof(char) + pg_info_.members.size() * sizeof(pg_members) + num_chunks * sizeof(homestore::chunk_num_t)); pg_sb_->id = pg_info_.id; + pg_sb_->state = PGState::ALIVE; pg_sb_->num_members = pg_info_.members.size(); pg_sb_->num_chunks = num_chunks; pg_sb_->pg_size = pg_info_.size; diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp index 6c0fbe32..562244bc 100644 --- a/src/lib/homestore_backend/hs_shard_manager.cpp +++ b/src/lib/homestore_backend/hs_shard_manager.cpp @@ -44,6 +44,7 @@ ShardError toShardError(ReplServiceError const& e) { } } + uint64_t ShardManager::max_shard_size() { return Gi; } uint64_t ShardManager::max_shard_num_in_pg() { return ((uint64_t)0x01) << shard_width; } @@ -526,6 +527,25 @@ bool HSHomeObject::release_chunk_based_on_create_shard_message(sisl::blob const& } } +void HSHomeObject::destroy_shards(pg_id_t pg_id) { + auto lg = std::scoped_lock(_pg_lock, _shard_lock); + auto iter = _pg_map.find(pg_id); + if (iter == _pg_map.end()) { + LOGW("on shards destroy with unknown pg_id {}", pg_id); + return; + } + + auto& pg = iter->second; + for (auto& shard : pg->shards_) { + // release open shard v_chunk + auto hs_shard = s_cast< HS_Shard* >(shard.get()); + // destroy shard super blk + hs_shard->sb_.destroy(); + // erase shard in shard map + _shard_map.erase(shard->info.id); + } +} + HSHomeObject::HS_Shard::HS_Shard(ShardInfo shard_info, homestore::chunk_num_t p_chunk_id, homestore::chunk_num_t v_chunk_id) : Shard(std::move(shard_info)), sb_(_shard_meta_name) { diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index e0c41276..e9bf3579 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -193,8 +193,14 @@ void ReplicationStateMachine::on_replace_member(const homestore::replica_member_ } void ReplicationStateMachine::on_destroy(const homestore::group_id_t& group_id) { - // TODO:: add the logic to handle destroy - LOGI("replica destroyed"); + auto PG_ID = home_object_->get_pg_id_with_group_id(group_id); + if (!PG_ID.has_value()) { + LOGW("do not have pg mapped by group_id {}", boost::uuids::to_string(group_id)); + return; + } + home_object_->pg_destroy(PG_ID.value()); + LOGI("replica destroyed, cleared PG {} resources with group_id {}", PG_ID.value(), + boost::uuids::to_string(group_id)); } homestore::AsyncReplResult<> diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp index 250016b3..63b9dba2 100644 --- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp +++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp @@ -131,11 +131,11 @@ class HomeObjectFixture : public ::testing::Test { run_on_pg_leader(pg_id, [&]() { auto v_chunkID = _obj_inst->get_shard_v_chunk_id(shard_id); RELEASE_ASSERT(v_chunkID.has_value(), "failed to get shard v_chunk_id"); - g_helper->set_v_chunk_id(v_chunkID.value()); + g_helper->set_auxiliary_uint64_id(v_chunkID.value()); }); // get v_chunk_id from IPC and compare with local - auto leader_v_chunk_id = g_helper->get_v_chunk_id(); + auto leader_v_chunk_id = g_helper->get_auxiliary_uint64_id(); auto local_v_chunkID = _obj_inst->get_shard_v_chunk_id(shard_id); RELEASE_ASSERT(local_v_chunkID.has_value(), "failed to get shard v_chunk_id"); RELEASE_ASSERT(leader_v_chunk_id == local_v_chunkID, "v_chunk_id supposed to be identical"); @@ -318,6 +318,22 @@ class HomeObjectFixture : public ::testing::Test { } } + void verify_pg_destroy(pg_id_t pg_id, const string& index_table_uuid_str, + const std::vector< shard_id_t >& shard_id_vec) { + // check pg + ASSERT_FALSE(pg_exist(pg_id)); + ASSERT_EQ(_obj_inst->index_table_pg_map_.find(index_table_uuid_str), _obj_inst->index_table_pg_map_.end()); + // check shards + auto e = _obj_inst->shard_manager()->list_shards(pg_id).get(); + ASSERT_EQ(e.error(), ShardError::UNKNOWN_PG); + for (const auto& shard_id : shard_id_vec) { + ASSERT_FALSE(shard_exist(shard_id)); + } + // check chunk_selector + const auto& chunk_selector = _obj_inst->chunk_selector(); + ASSERT_EQ(chunk_selector->m_per_pg_chunks.find(pg_id), chunk_selector->m_per_pg_chunks.end()); + } + void verify_hs_pg(HSHomeObject::HS_PG* lhs_pg, HSHomeObject::HS_PG* rhs_pg) { // verify index table EXPECT_EQ(lhs_pg->index_table_->uuid(), rhs_pg->index_table_->uuid()); diff --git a/src/lib/homestore_backend/tests/hs_blob_tests.cpp b/src/lib/homestore_backend/tests/hs_blob_tests.cpp index c4685575..75834e36 100644 --- a/src/lib/homestore_backend/tests/hs_blob_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_blob_tests.cpp @@ -14,6 +14,9 @@ TEST_F(HomeObjectFixture, BasicEquivalence) { } TEST_F(HomeObjectFixture, BasicPutGetDelBlobWRestart) { + // test recovery with pristine state firstly + restart(); + auto num_pgs = SISL_OPTIONS["num_pgs"].as< uint64_t >(); auto num_shards_per_pg = SISL_OPTIONS["num_shards"].as< uint64_t >() / num_pgs; diff --git a/src/lib/homestore_backend/tests/hs_pg_tests.cpp b/src/lib/homestore_backend/tests/hs_pg_tests.cpp index b9ac28cb..22f2f733 100644 --- a/src/lib/homestore_backend/tests/hs_pg_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_pg_tests.cpp @@ -124,6 +124,11 @@ TEST_F(HomeObjectFixture, PGSizeLessThanChunkTest) { } TEST_F(HomeObjectFixture, PGRecoveryTest) { + auto id = _obj_inst->our_uuid(); + // test recovery with pristine state firstly + restart(); + EXPECT_EQ(id, _obj_inst->our_uuid()); + // create 10 pg for (pg_id_t i = 1; i < 11; i++) { pg_id_t pg_id{i}; @@ -131,12 +136,8 @@ TEST_F(HomeObjectFixture, PGRecoveryTest) { } // get pg map - HSHomeObject* ho = dynamic_cast< HSHomeObject* >(_obj_inst.get()); std::map< pg_id_t, std::unique_ptr< PG > > pg_map; - pg_map.swap(ho->_pg_map); - - // get uuid - auto id = ho->our_uuid(); + pg_map.swap(_obj_inst->_pg_map); // restart restart(); diff --git a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp index 549655a6..f8784f7d 100644 --- a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp +++ b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp @@ -43,7 +43,6 @@ namespace bip = boost::interprocess; using namespace homeobject; #define INVALID_UINT64_ID UINT64_MAX -#define INVALID_CHUNK_NUM UINT16_MAX namespace test_common { @@ -57,7 +56,7 @@ class HSReplTestHelper { sync_point_num_ = sync_point; homeobject_replica_count_ = 0; uint64_id_ = INVALID_UINT64_ID; - v_chunk_id_ = INVALID_CHUNK_NUM; + auxiliary_uint64_id_ = UINT64_MAX; cv_.notify_all(); } else { cv_.wait(lg, [this, sync_point]() { return sync_point_num_ == sync_point; }); @@ -74,14 +73,13 @@ class HSReplTestHelper { return uint64_id_; } - void set_v_chunk_id(homestore::chunk_num_t input_v_chunk_id) { + void set_auxiliary_uint64_id(uint64_t input_auxiliary_uint64_id) { std::unique_lock< bip::interprocess_mutex > lg(mtx_); - v_chunk_id_ = input_v_chunk_id; + auxiliary_uint64_id_ = input_auxiliary_uint64_id; } - - homestore::chunk_num_t get_v_chunk_id() { + uint64_t get_auxiliary_uint64_id() { std::unique_lock< bip::interprocess_mutex > lg(mtx_); - return v_chunk_id_; + return auxiliary_uint64_id_; } private: @@ -89,11 +87,9 @@ class HSReplTestHelper { bip::interprocess_condition cv_; uint8_t homeobject_replica_count_{0}; - // the following variables are used to share shard_id and blob_id among different replicas + // the following variables are used to share shard_id, blob_id and others among different replicas uint64_t uint64_id_{0}; - - // used to verify identical layout - homestore::chunk_num_t v_chunk_id_{0}; + uint64_t auxiliary_uint64_id_{0}; // the nth synchronization point, that is how many times different replicas have synced uint64_t sync_point_num_{UINT64_MAX}; @@ -271,9 +267,10 @@ class HSReplTestHelper { void sync() { ipc_data_->sync(sync_point_num++, total_replicas_nums_); } void set_uint64_id(uint64_t uint64_id) { ipc_data_->set_uint64_id(uint64_id); } uint64_t get_uint64_id() { return ipc_data_->get_uint64_id(); } - void set_v_chunk_id(homestore::chunk_num_t v_chunk_id) { ipc_data_->set_v_chunk_id(v_chunk_id); } - homestore::chunk_num_t get_v_chunk_id() { return ipc_data_->get_v_chunk_id(); } - + void set_auxiliary_uint64_id(uint64_t input_auxiliary_uint64_id) { + ipc_data_->set_auxiliary_uint64_id(input_auxiliary_uint64_id); + } + uint64_t get_auxiliary_uint64_id() { return ipc_data_->get_auxiliary_uint64_id(); } void check_and_kill(int port) { std::string command = "lsof -t -i:" + std::to_string(port); if (::system(command.c_str())) { diff --git a/src/lib/homestore_backend/tests/hs_shard_tests.cpp b/src/lib/homestore_backend/tests/hs_shard_tests.cpp index 884065d6..c16b8ed6 100644 --- a/src/lib/homestore_backend/tests/hs_shard_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_shard_tests.cpp @@ -171,6 +171,9 @@ TEST_F(HomeObjectFixture, ShardManagerRecovery) { } TEST_F(HomeObjectFixture, SealedShardRecovery) { + // test recovery with pristine state firstly + restart(); + pg_id_t pg_id{1}; create_pg(pg_id); diff --git a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp index 0251e4fb..e1ff6acd 100644 --- a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp @@ -73,7 +73,7 @@ uint16_t VChunk::get_chunk_id() const { return m_internal_chunk->get_chunk_id(); blk_num_t VChunk::get_total_blks() const { return m_internal_chunk->get_total_blks(); } uint64_t VChunk::size() const { return m_internal_chunk->size(); } - +void VChunk::reset() {} cshared< Chunk > VChunk::get_internal_chunk() const { return m_internal_chunk; } } // namespace homestore @@ -276,6 +276,18 @@ TEST_F(HeapChunkSelectorTest, test_select_specific_chunk_and_release_chunk) { } } +TEST_F(HeapChunkSelectorTest, test_return_pg_chunks) { + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + ASSERT_TRUE(HCS.return_pg_chunks_to_dev_heap(pg_id)); + ASSERT_EQ(HCS.m_per_pg_chunks.find(pg_id), HCS.m_per_pg_chunks.end()); + ASSERT_EQ(HCS.m_per_dev_heap[pg_id]->available_blk_count, 1 + 2 + 3); + ASSERT_EQ(HCS.m_per_dev_heap[pg_id]->available_blk_count, HCS.m_per_dev_heap[pg_id]->m_total_blks); + } + for (const auto& [_, chunk] : HCS.m_chunks) { + ASSERT_EQ(chunk->m_state, ChunkState::AVAILABLE); + } +} + TEST_F(HeapChunkSelectorTest, test_recovery) { HeapChunkSelector HCS_recovery; HCS_recovery.add_chunk(std::make_shared< Chunk >(1, 1, 1, 9)); diff --git a/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp b/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp index 681782b6..5231983e 100644 --- a/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp +++ b/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp @@ -66,6 +66,15 @@ TEST_F(HomeObjectFixture, ReplaceMember) { auto out_member_id = g_helper->replica_id(num_replicas - 1); auto in_member_id = g_helper->replica_id(num_replicas); /*spare replica*/ + // get out_member's index_table_uuid with pg_id + string index_table_uuid_str; + if (out_member_id == g_helper->my_replica_id()) { + auto iter = _obj_inst->_pg_map.find(pg_id); + RELEASE_ASSERT(iter != _obj_inst->_pg_map.end(), "PG not found"); + auto hs_pg = static_cast< homeobject::HSHomeObject::HS_PG* >(iter->second.get()); + index_table_uuid_str = boost::uuids::to_string(hs_pg->pg_sb_->index_table_uuid); + } + run_on_pg_leader(pg_id, [&]() { auto r = _obj_inst->pg_manager() ->replace_member(pg_id, out_member_id, PGMember{in_member_id, "new_member", 0}) @@ -90,6 +99,40 @@ TEST_F(HomeObjectFixture, ReplaceMember) { verify_get_blob(pg_shard_id_vec, num_blobs_per_shard); verify_obj_count(1, num_blobs_per_shard, num_shards_per_pg, false); }); + + // step 5: Verify no pg related data in out_member + if (out_member_id == g_helper->my_replica_id()) { + while (am_i_in_pg(pg_id)) { + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + LOGINFO("old member is waiting to leave pg {}", pg_id); + } + + verify_pg_destroy(pg_id, index_table_uuid_str, pg_shard_id_vec[pg_id]); + // since this case out_member don't have any pg, so we can check each chunk. + for (const auto& [_, chunk] : _obj_inst->chunk_selector()->m_chunks) { + ASSERT_EQ(chunk->m_state, ChunkState::AVAILABLE); + ASSERT_EQ(chunk->available_blks(), chunk->get_total_blks()); + } + LOGINFO("check no pg related data in out member successfully"); + } + + // Step 6: restart, verify the blobs again on all members, including the new spare replica, and out_member + restart(); + run_if_in_pg(pg_id, [&]() { + verify_get_blob(pg_shard_id_vec, num_blobs_per_shard); + verify_obj_count(1, num_blobs_per_shard, num_shards_per_pg, false); + LOGINFO("After restart, check pg related data in pg members successfully"); + }); + + if (out_member_id == g_helper->my_replica_id()) { + verify_pg_destroy(pg_id, index_table_uuid_str, pg_shard_id_vec[pg_id]); + // since this case out_member don't have any pg, so we can check each chunk. + for (const auto& [_, chunk] : _obj_inst->chunk_selector()->m_chunks) { + ASSERT_EQ(chunk->m_state, ChunkState::AVAILABLE); + ASSERT_EQ(chunk->available_blks(), chunk->get_total_blks()); + } + LOGINFO("After restart, check no pg related data in out member successfully"); + } } SISL_OPTION_GROUP( From 067612f0ca24cb17fcc2f3212b65c9db671a65f5 Mon Sep 17 00:00:00 2001 From: yuwmao <148639999+yuwmao@users.noreply.github.com> Date: Tue, 24 Dec 2024 15:12:23 +0800 Subject: [PATCH 2/2] Adaptive changes for homestore/6.6.x (#246) * Avoid returning nullptr for cp_swtichover. HS will assume the consumer not participanting CP if nullptr returned. Signed-off-by: Xiaoxi Chen * Handle ReplServiceError::DATA_DUPLICATED --------- Signed-off-by: Xiaoxi Chen Co-authored-by: Xiaoxi Chen --- conanfile.py | 2 +- src/lib/homestore_backend/hs_cp_callbacks.cpp | 4 +++- src/lib/homestore_backend/hs_pg_manager.cpp | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/conanfile.py b/conanfile.py index 56646fcf..bd8e53cd 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.18" + version = "2.1.19" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" diff --git a/src/lib/homestore_backend/hs_cp_callbacks.cpp b/src/lib/homestore_backend/hs_cp_callbacks.cpp index f50ce386..f54a0edd 100644 --- a/src/lib/homestore_backend/hs_cp_callbacks.cpp +++ b/src/lib/homestore_backend/hs_cp_callbacks.cpp @@ -23,7 +23,9 @@ using homestore::CPContext; namespace homeobject { -std::unique_ptr< CPContext > HSHomeObject::MyCPCallbacks::on_switchover_cp(CP* cur_cp, CP* new_cp) { return nullptr; } +std::unique_ptr< CPContext > HSHomeObject::MyCPCallbacks::on_switchover_cp(CP* cur_cp, CP* new_cp) { + return std::make_unique< CPContext >(new_cp); +} // when cp_flush is called, it means that all the dirty candidates are already in the dirty list. // new dirty candidates will arrive on next cp's context. diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 38612348..d612ba94 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -46,10 +46,13 @@ PGError toPgError(ReplServiceError const& e) { case ReplServiceError::OK: DEBUG_ASSERT(false, "Should not process OK!"); [[fallthrough]]; + case ReplServiceError::DATA_DUPLICATED: + [[fallthrough]]; case ReplServiceError::FAILED: return PGError::UNKNOWN; + default: + return PGError::UNKNOWN; } - return PGError::UNKNOWN; } [[maybe_unused]] static homestore::ReplDev& pg_repl_dev(PG const& pg) {