diff --git a/conanfile.py b/conanfile.py index 4d79d90..3e649fe 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.17" + version = "2.1.19" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp index 95d07f7..3ed07b0 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/heap_chunk_selector.cpp @@ -118,6 +118,54 @@ bool HeapChunkSelector::release_chunk(const pg_id_t pg_id, const chunk_num_t v_c return true; } +bool HeapChunkSelector::reset_pg_chunks(pg_id_t pg_id) { + std::shared_lock lock_guard(m_chunk_selector_mtx); + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); + return false; + } + { + auto pg_chunk_collection = pg_it->second; + std::scoped_lock lock(pg_chunk_collection->mtx); + for (auto& chunk : pg_chunk_collection->m_pg_chunks) { + chunk->reset(); + } + } + return true; +} + +bool HeapChunkSelector::return_pg_chunks_to_dev_heap(const pg_id_t pg_id) { + std::unique_lock lock_guard(m_chunk_selector_mtx); + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); + return false; + } + + auto pg_chunk_collection = pg_it->second; + auto pdev_id = pg_chunk_collection->m_pg_chunks[0]->get_pdev_id(); + auto pdev_it = m_per_dev_heap.find(pdev_id); + RELEASE_ASSERT(pdev_it != m_per_dev_heap.end(), "pdev {} should in per dev heap", pdev_id); + auto pdev_heap = pdev_it->second; + + { + std::scoped_lock lock(pdev_heap->mtx, pg_chunk_collection->mtx); + for (auto& chunk : pg_chunk_collection->m_pg_chunks) { + if (chunk->m_state == ChunkState::INUSE) { + chunk->m_state = ChunkState::AVAILABLE; + } // with shard which should be first + chunk->m_pg_id = std::nullopt; + chunk->m_v_chunk_id = std::nullopt; + + pdev_heap->m_heap.emplace(chunk); + pdev_heap->available_blk_count += chunk->available_blks(); + } + } + m_per_pg_chunks.erase(pg_it); + return true; +} + uint32_t HeapChunkSelector::get_chunk_size() const { const auto chunk = m_chunks.begin()->second; return chunk->size(); diff --git a/src/lib/homestore_backend/heap_chunk_selector.h b/src/lib/homestore_backend/heap_chunk_selector.h index c0a84b0..4c4d8db 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.h +++ b/src/lib/homestore_backend/heap_chunk_selector.h @@ -79,6 +79,20 @@ class HeapChunkSelector : public homestore::ChunkSelector { // It is used in two scenarios: 1. seal shard 2. create shard rollback bool release_chunk(const pg_id_t pg_id, const chunk_num_t v_chunk_id); + bool reset_pg_chunks(pg_id_t pg_id); + + /** + * Releases all chunks associated with the specified pg_id. + * + * This function is used to return all chunks that are currently associated with a particular + * pg identified by the given pg_id. It is typically used in scenarios where + * all chunks associated with a pg need to be freed, such as pg move out. + * + * @param pg_id The ID of the protection group whose chunks are to be released. + * @return A boolean value indicating whether the operation was successful. + */ + bool return_pg_chunks_to_dev_heap(pg_id_t pg_id); + /** * select chunks for pg, chunks need to be in same pdev. * diff --git a/src/lib/homestore_backend/hs_cp_callbacks.cpp b/src/lib/homestore_backend/hs_cp_callbacks.cpp index f50ce38..f54a0ed 100644 --- a/src/lib/homestore_backend/hs_cp_callbacks.cpp +++ b/src/lib/homestore_backend/hs_cp_callbacks.cpp @@ -23,7 +23,9 @@ using homestore::CPContext; namespace homeobject { -std::unique_ptr< CPContext > HSHomeObject::MyCPCallbacks::on_switchover_cp(CP* cur_cp, CP* new_cp) { return nullptr; } +std::unique_ptr< CPContext > HSHomeObject::MyCPCallbacks::on_switchover_cp(CP* cur_cp, CP* new_cp) { + return std::make_unique< CPContext >(new_cp); +} // when cp_flush is called, it means that all the dirty candidates are already in the dirty list. // new dirty candidates will arrive on next cp's context. diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index 4f3be36..3411c1d 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -33,6 +33,7 @@ static constexpr uint64_t io_align{512}; PGError toPgError(homestore::ReplServiceError const&); BlobError toBlobError(homestore::ReplServiceError const&); ShardError toShardError(homestore::ReplServiceError const&); +ENUM(PGState, uint8_t, ALIVE = 0, DESTROYED); class HSHomeObject : public HomeObjectImpl { private: @@ -83,6 +84,7 @@ class HSHomeObject : public HomeObjectImpl { struct pg_info_superblk { pg_id_t id; + PGState state; uint32_t num_members; uint32_t num_chunks; peer_id_t replica_set_uuid; @@ -112,6 +114,7 @@ class HSHomeObject : public HomeObjectImpl { pg_info_superblk& operator=(pg_info_superblk const& rhs) { id = rhs.id; + state = rhs.state; num_members = rhs.num_members; num_chunks = rhs.num_chunks; pg_size = rhs.pg_size; @@ -483,6 +486,20 @@ class HSHomeObject : public HomeObjectImpl { void on_pg_replace_member(homestore::group_id_t group_id, const homestore::replica_member_info& member_out, const homestore::replica_member_info& member_in); + /** + * @brief Cleans up and recycles resources for the PG identified by the given pg_id on the current node. + * + * This function is called when the replication device leaves or when a specific PG is destroyed on the current + * node. Note that this function does not perform Raft synchronization with other nodes. + * + * Possible scenarios for calling this function include: + * - A member-out node cleaning up resources for a specified PG. + * - During baseline rsync to clean up PG resources on the current node. + * + * @param pg_id The ID of the PG to be destroyed. + */ + void pg_destroy(pg_id_t pg_id); + /** * @brief Callback function invoked when a message is committed on a shard. * @@ -569,6 +586,7 @@ class HSHomeObject : public HomeObjectImpl { size_t hash_len) const; std::shared_ptr< BlobIndexTable > recover_index_table(homestore::superblk< homestore::index_table_sb >&& sb); + std::optional< pg_id_t > get_pg_id_with_group_id(homestore::group_id_t group_id) const; private: std::shared_ptr< BlobIndexTable > create_index_table(); @@ -593,6 +611,36 @@ class HSHomeObject : public HomeObjectImpl { sisl::io_blob_safe& get_pad_buf(uint32_t pad_len); // void trigger_timed_events(); + + /** + * @brief Marks the PG as destroyed. + * + * Updates the internal state to indicate that the specified PG is destroyed and ensures its state is persisted. + * + * @param pg_id The ID of the PG to be marked as destroyed. + */ + void mark_pg_destroyed(pg_id_t pg_id); + + /** + * @brief Cleans up and recycles resources for shards in the PG located using a PG ID. + * + * @param pg_id The ID of the PG whose shards are to be destroyed. + */ + void destroy_shards(pg_id_t pg_id); + + /** + * @brief Resets the chunks for the given PG ID and triggers a checkpoint flush. + * + * @param pg_id The ID of the PG whose chunks are to be reset. + */ + void reset_pg_chunks(pg_id_t pg_id); + + /** + * @brief Cleans up and recycles resources for the PG located using a pg_id. + * + * @param pg_id The ID of the PG to be cleaned. + */ + void cleanup_pg_resources(pg_id_t pg_id); }; class BlobIndexServiceCallbacks : public homestore::IndexServiceCallbacks { diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 62d593e..da77684 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -47,10 +47,13 @@ PGError toPgError(ReplServiceError const& e) { case ReplServiceError::OK: DEBUG_ASSERT(false, "Should not process OK!"); [[fallthrough]]; + case ReplServiceError::DATA_DUPLICATED: + [[fallthrough]]; case ReplServiceError::FAILED: return PGError::UNKNOWN; + default: + return PGError::UNKNOWN; } - return PGError::UNKNOWN; } [[maybe_unused]] static homestore::ReplDev& pg_repl_dev(PG const& pg) { @@ -265,6 +268,79 @@ void HSHomeObject::on_pg_replace_member(homestore::group_id_t group_id, const re boost::uuids::to_string(member_in.id)); } +std::optional< pg_id_t > HSHomeObject::get_pg_id_with_group_id(homestore::group_id_t group_id) const { + auto lg = std::shared_lock(_pg_lock); + auto iter = std::find_if(_pg_map.begin(), _pg_map.end(), [group_id](const auto& entry) { + return pg_repl_dev(*entry.second).group_id() == group_id; + }); + if (iter != _pg_map.end()) { + return iter->first; + } else { + return std::nullopt; + } +} + +void HSHomeObject::pg_destroy(pg_id_t pg_id) { + mark_pg_destroyed(pg_id); + destroy_shards(pg_id); + reset_pg_chunks(pg_id); + cleanup_pg_resources(pg_id); + LOGI("pg {} is destroyed", pg_id); +} +void HSHomeObject::mark_pg_destroyed(pg_id_t pg_id) { + auto lg = std::scoped_lock(_pg_lock); + auto iter = _pg_map.find(pg_id); + if (iter == _pg_map.end()) { + LOGW("on pg destroy with unknown pg_id {}", pg_id); + return; + } + auto& pg = iter->second; + auto hs_pg = s_cast< HS_PG* >(pg.get()); + hs_pg->pg_sb_->state = PGState::DESTROYED; + hs_pg->pg_sb_.write(); +} + +void HSHomeObject::reset_pg_chunks(pg_id_t pg_id) { + bool res = chunk_selector_->reset_pg_chunks(pg_id); + RELEASE_ASSERT(res, "Failed to reset all chunks in pg {}", pg_id); + auto fut = homestore::hs()->cp_mgr().trigger_cp_flush(true /* force */); + auto on_complete = [&](auto success) { + RELEASE_ASSERT(success, "Failed to trigger CP flush"); + LOGI("CP Flush completed"); + }; + on_complete(std::move(fut).get()); +} + +void HSHomeObject::cleanup_pg_resources(pg_id_t pg_id) { + auto lg = std::scoped_lock(_pg_lock); + auto iter = _pg_map.find(pg_id); + if (iter == _pg_map.end()) { + LOGW("on pg resource release with unknown pg_id {}", pg_id); + return; + } + + // destroy index table + auto& pg = iter->second; + auto hs_pg = s_cast< HS_PG* >(pg.get()); + if (nullptr != hs_pg->index_table_) { + auto uuid_str = boost::uuids::to_string(hs_pg->index_table_->uuid()); + index_table_pg_map_.erase(uuid_str); + hs()->index_service().remove_index_table(hs_pg->index_table_); + hs_pg->index_table_->destroy(); + } + + // destroy pg super blk + hs_pg->pg_sb_.destroy(); + + // return pg chunks to dev heap + // which must be done after destroying pg super blk to avoid multiple pg use same chunks + bool res = chunk_selector_->return_pg_chunks_to_dev_heap(pg_id); + RELEASE_ASSERT(res, "Failed to return pg {} chunks to dev_heap", pg_id); + + // erase pg in pg map + _pg_map.erase(iter); +} + void HSHomeObject::add_pg_to_map(unique< HS_PG > hs_pg) { RELEASE_ASSERT(hs_pg->pg_info_.replica_set_uuid == hs_pg->repl_dev_->group_id(), "PGInfo replica set uuid mismatch with ReplDev instance for {}", @@ -332,9 +408,14 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c // add entry in map, so that index recovery can update the PG. std::scoped_lock lg(index_lock_); auto it = index_table_pg_map_.find(uuid_str); - RELEASE_ASSERT(it != index_table_pg_map_.end(), "IndexTable should be recovered before PG"); - hs_pg->index_table_ = it->second.index_table; - it->second.pg_id = pg_id; + if (it != index_table_pg_map_.end()) { + hs_pg->index_table_ = it->second.index_table; + it->second.pg_id = pg_id; + } else { + RELEASE_ASSERT(hs_pg->pg_sb_->state == PGState::DESTROYED, "IndexTable should be recovered before PG"); + hs_pg->index_table_ = nullptr; + LOGI("Index table not found for destroyed pg_id={}, index_table_uuid={}", pg_id, uuid_str); + } add_pg_to_map(std::move(hs_pg)); } @@ -364,6 +445,7 @@ HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, share pg_sb_.create(sizeof(pg_info_superblk) - sizeof(char) + pg_info_.members.size() * sizeof(pg_members) + num_chunks * sizeof(homestore::chunk_num_t)); pg_sb_->id = pg_info_.id; + pg_sb_->state = PGState::ALIVE; pg_sb_->num_members = pg_info_.members.size(); pg_sb_->num_chunks = num_chunks; pg_sb_->pg_size = pg_info_.size; diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp index 6542e8b..e4ad367 100644 --- a/src/lib/homestore_backend/hs_shard_manager.cpp +++ b/src/lib/homestore_backend/hs_shard_manager.cpp @@ -44,6 +44,7 @@ ShardError toShardError(ReplServiceError const& e) { } } + uint64_t ShardManager::max_shard_size() { return Gi; } uint64_t ShardManager::max_shard_num_in_pg() { return ((uint64_t)0x01) << shard_width; } @@ -544,6 +545,25 @@ bool HSHomeObject::release_chunk_based_on_create_shard_message(sisl::blob const& } } +void HSHomeObject::destroy_shards(pg_id_t pg_id) { + auto lg = std::scoped_lock(_pg_lock, _shard_lock); + auto iter = _pg_map.find(pg_id); + if (iter == _pg_map.end()) { + LOGW("on shards destroy with unknown pg_id {}", pg_id); + return; + } + + auto& pg = iter->second; + for (auto& shard : pg->shards_) { + // release open shard v_chunk + auto hs_shard = s_cast< HS_Shard* >(shard.get()); + // destroy shard super blk + hs_shard->sb_.destroy(); + // erase shard in shard map + _shard_map.erase(shard->info.id); + } +} + HSHomeObject::HS_Shard::HS_Shard(ShardInfo shard_info, homestore::chunk_num_t p_chunk_id, homestore::chunk_num_t v_chunk_id) : Shard(std::move(shard_info)), sb_(_shard_meta_name) { diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index 9a12f05..f235560 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -193,8 +193,14 @@ void ReplicationStateMachine::on_replace_member(const homestore::replica_member_ } void ReplicationStateMachine::on_destroy(const homestore::group_id_t& group_id) { - // TODO:: add the logic to handle destroy - LOGI("replica destroyed"); + auto PG_ID = home_object_->get_pg_id_with_group_id(group_id); + if (!PG_ID.has_value()) { + LOGW("do not have pg mapped by group_id {}", boost::uuids::to_string(group_id)); + return; + } + home_object_->pg_destroy(PG_ID.value()); + LOGI("replica destroyed, cleared PG {} resources with group_id {}", PG_ID.value(), + boost::uuids::to_string(group_id)); } homestore::AsyncReplResult<> diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp index 8daa07e..23a11a1 100644 --- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp +++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp @@ -131,11 +131,11 @@ class HomeObjectFixture : public ::testing::Test { run_on_pg_leader(pg_id, [&]() { auto v_chunkID = _obj_inst->get_shard_v_chunk_id(shard_id); RELEASE_ASSERT(v_chunkID.has_value(), "failed to get shard v_chunk_id"); - g_helper->set_v_chunk_id(v_chunkID.value()); + g_helper->set_auxiliary_uint64_id(v_chunkID.value()); }); // get v_chunk_id from IPC and compare with local - auto leader_v_chunk_id = g_helper->get_v_chunk_id(); + auto leader_v_chunk_id = g_helper->get_auxiliary_uint64_id(); auto local_v_chunkID = _obj_inst->get_shard_v_chunk_id(shard_id); RELEASE_ASSERT(local_v_chunkID.has_value(), "failed to get shard v_chunk_id"); RELEASE_ASSERT(leader_v_chunk_id == local_v_chunkID, "v_chunk_id supposed to be identical"); @@ -332,6 +332,22 @@ class HomeObjectFixture : public ::testing::Test { } } + void verify_pg_destroy(pg_id_t pg_id, const string& index_table_uuid_str, + const std::vector< shard_id_t >& shard_id_vec) { + // check pg + ASSERT_FALSE(pg_exist(pg_id)); + ASSERT_EQ(_obj_inst->index_table_pg_map_.find(index_table_uuid_str), _obj_inst->index_table_pg_map_.end()); + // check shards + auto e = _obj_inst->shard_manager()->list_shards(pg_id).get(); + ASSERT_EQ(e.error(), ShardError::UNKNOWN_PG); + for (const auto& shard_id : shard_id_vec) { + ASSERT_FALSE(shard_exist(shard_id)); + } + // check chunk_selector + const auto& chunk_selector = _obj_inst->chunk_selector(); + ASSERT_EQ(chunk_selector->m_per_pg_chunks.find(pg_id), chunk_selector->m_per_pg_chunks.end()); + } + void verify_hs_pg(HSHomeObject::HS_PG* lhs_pg, HSHomeObject::HS_PG* rhs_pg) { // verify index table EXPECT_EQ(lhs_pg->index_table_->uuid(), rhs_pg->index_table_->uuid()); diff --git a/src/lib/homestore_backend/tests/hs_blob_tests.cpp b/src/lib/homestore_backend/tests/hs_blob_tests.cpp index 4dd417b..2d2289f 100644 --- a/src/lib/homestore_backend/tests/hs_blob_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_blob_tests.cpp @@ -15,6 +15,9 @@ TEST_F(HomeObjectFixture, BasicEquivalence) { } TEST_F(HomeObjectFixture, BasicPutGetDelBlobWRestart) { + // test recovery with pristine state firstly + restart(); + auto num_pgs = SISL_OPTIONS["num_pgs"].as< uint64_t >(); auto num_shards_per_pg = SISL_OPTIONS["num_shards"].as< uint64_t >() / num_pgs; diff --git a/src/lib/homestore_backend/tests/hs_pg_tests.cpp b/src/lib/homestore_backend/tests/hs_pg_tests.cpp index b9ac28c..22f2f73 100644 --- a/src/lib/homestore_backend/tests/hs_pg_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_pg_tests.cpp @@ -124,6 +124,11 @@ TEST_F(HomeObjectFixture, PGSizeLessThanChunkTest) { } TEST_F(HomeObjectFixture, PGRecoveryTest) { + auto id = _obj_inst->our_uuid(); + // test recovery with pristine state firstly + restart(); + EXPECT_EQ(id, _obj_inst->our_uuid()); + // create 10 pg for (pg_id_t i = 1; i < 11; i++) { pg_id_t pg_id{i}; @@ -131,12 +136,8 @@ TEST_F(HomeObjectFixture, PGRecoveryTest) { } // get pg map - HSHomeObject* ho = dynamic_cast< HSHomeObject* >(_obj_inst.get()); std::map< pg_id_t, std::unique_ptr< PG > > pg_map; - pg_map.swap(ho->_pg_map); - - // get uuid - auto id = ho->our_uuid(); + pg_map.swap(_obj_inst->_pg_map); // restart restart(); diff --git a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp index 549655a..f8784f7 100644 --- a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp +++ b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp @@ -43,7 +43,6 @@ namespace bip = boost::interprocess; using namespace homeobject; #define INVALID_UINT64_ID UINT64_MAX -#define INVALID_CHUNK_NUM UINT16_MAX namespace test_common { @@ -57,7 +56,7 @@ class HSReplTestHelper { sync_point_num_ = sync_point; homeobject_replica_count_ = 0; uint64_id_ = INVALID_UINT64_ID; - v_chunk_id_ = INVALID_CHUNK_NUM; + auxiliary_uint64_id_ = UINT64_MAX; cv_.notify_all(); } else { cv_.wait(lg, [this, sync_point]() { return sync_point_num_ == sync_point; }); @@ -74,14 +73,13 @@ class HSReplTestHelper { return uint64_id_; } - void set_v_chunk_id(homestore::chunk_num_t input_v_chunk_id) { + void set_auxiliary_uint64_id(uint64_t input_auxiliary_uint64_id) { std::unique_lock< bip::interprocess_mutex > lg(mtx_); - v_chunk_id_ = input_v_chunk_id; + auxiliary_uint64_id_ = input_auxiliary_uint64_id; } - - homestore::chunk_num_t get_v_chunk_id() { + uint64_t get_auxiliary_uint64_id() { std::unique_lock< bip::interprocess_mutex > lg(mtx_); - return v_chunk_id_; + return auxiliary_uint64_id_; } private: @@ -89,11 +87,9 @@ class HSReplTestHelper { bip::interprocess_condition cv_; uint8_t homeobject_replica_count_{0}; - // the following variables are used to share shard_id and blob_id among different replicas + // the following variables are used to share shard_id, blob_id and others among different replicas uint64_t uint64_id_{0}; - - // used to verify identical layout - homestore::chunk_num_t v_chunk_id_{0}; + uint64_t auxiliary_uint64_id_{0}; // the nth synchronization point, that is how many times different replicas have synced uint64_t sync_point_num_{UINT64_MAX}; @@ -271,9 +267,10 @@ class HSReplTestHelper { void sync() { ipc_data_->sync(sync_point_num++, total_replicas_nums_); } void set_uint64_id(uint64_t uint64_id) { ipc_data_->set_uint64_id(uint64_id); } uint64_t get_uint64_id() { return ipc_data_->get_uint64_id(); } - void set_v_chunk_id(homestore::chunk_num_t v_chunk_id) { ipc_data_->set_v_chunk_id(v_chunk_id); } - homestore::chunk_num_t get_v_chunk_id() { return ipc_data_->get_v_chunk_id(); } - + void set_auxiliary_uint64_id(uint64_t input_auxiliary_uint64_id) { + ipc_data_->set_auxiliary_uint64_id(input_auxiliary_uint64_id); + } + uint64_t get_auxiliary_uint64_id() { return ipc_data_->get_auxiliary_uint64_id(); } void check_and_kill(int port) { std::string command = "lsof -t -i:" + std::to_string(port); if (::system(command.c_str())) { diff --git a/src/lib/homestore_backend/tests/hs_shard_tests.cpp b/src/lib/homestore_backend/tests/hs_shard_tests.cpp index 884065d..c16b8ed 100644 --- a/src/lib/homestore_backend/tests/hs_shard_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_shard_tests.cpp @@ -171,6 +171,9 @@ TEST_F(HomeObjectFixture, ShardManagerRecovery) { } TEST_F(HomeObjectFixture, SealedShardRecovery) { + // test recovery with pristine state firstly + restart(); + pg_id_t pg_id{1}; create_pg(pg_id); diff --git a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp index 0251e4f..e1ff6ac 100644 --- a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp @@ -73,7 +73,7 @@ uint16_t VChunk::get_chunk_id() const { return m_internal_chunk->get_chunk_id(); blk_num_t VChunk::get_total_blks() const { return m_internal_chunk->get_total_blks(); } uint64_t VChunk::size() const { return m_internal_chunk->size(); } - +void VChunk::reset() {} cshared< Chunk > VChunk::get_internal_chunk() const { return m_internal_chunk; } } // namespace homestore @@ -276,6 +276,18 @@ TEST_F(HeapChunkSelectorTest, test_select_specific_chunk_and_release_chunk) { } } +TEST_F(HeapChunkSelectorTest, test_return_pg_chunks) { + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + ASSERT_TRUE(HCS.return_pg_chunks_to_dev_heap(pg_id)); + ASSERT_EQ(HCS.m_per_pg_chunks.find(pg_id), HCS.m_per_pg_chunks.end()); + ASSERT_EQ(HCS.m_per_dev_heap[pg_id]->available_blk_count, 1 + 2 + 3); + ASSERT_EQ(HCS.m_per_dev_heap[pg_id]->available_blk_count, HCS.m_per_dev_heap[pg_id]->m_total_blks); + } + for (const auto& [_, chunk] : HCS.m_chunks) { + ASSERT_EQ(chunk->m_state, ChunkState::AVAILABLE); + } +} + TEST_F(HeapChunkSelectorTest, test_recovery) { HeapChunkSelector HCS_recovery; HCS_recovery.add_chunk(std::make_shared< Chunk >(1, 1, 1, 9)); diff --git a/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp b/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp index 681782b..5231983 100644 --- a/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp +++ b/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp @@ -66,6 +66,15 @@ TEST_F(HomeObjectFixture, ReplaceMember) { auto out_member_id = g_helper->replica_id(num_replicas - 1); auto in_member_id = g_helper->replica_id(num_replicas); /*spare replica*/ + // get out_member's index_table_uuid with pg_id + string index_table_uuid_str; + if (out_member_id == g_helper->my_replica_id()) { + auto iter = _obj_inst->_pg_map.find(pg_id); + RELEASE_ASSERT(iter != _obj_inst->_pg_map.end(), "PG not found"); + auto hs_pg = static_cast< homeobject::HSHomeObject::HS_PG* >(iter->second.get()); + index_table_uuid_str = boost::uuids::to_string(hs_pg->pg_sb_->index_table_uuid); + } + run_on_pg_leader(pg_id, [&]() { auto r = _obj_inst->pg_manager() ->replace_member(pg_id, out_member_id, PGMember{in_member_id, "new_member", 0}) @@ -90,6 +99,40 @@ TEST_F(HomeObjectFixture, ReplaceMember) { verify_get_blob(pg_shard_id_vec, num_blobs_per_shard); verify_obj_count(1, num_blobs_per_shard, num_shards_per_pg, false); }); + + // step 5: Verify no pg related data in out_member + if (out_member_id == g_helper->my_replica_id()) { + while (am_i_in_pg(pg_id)) { + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + LOGINFO("old member is waiting to leave pg {}", pg_id); + } + + verify_pg_destroy(pg_id, index_table_uuid_str, pg_shard_id_vec[pg_id]); + // since this case out_member don't have any pg, so we can check each chunk. + for (const auto& [_, chunk] : _obj_inst->chunk_selector()->m_chunks) { + ASSERT_EQ(chunk->m_state, ChunkState::AVAILABLE); + ASSERT_EQ(chunk->available_blks(), chunk->get_total_blks()); + } + LOGINFO("check no pg related data in out member successfully"); + } + + // Step 6: restart, verify the blobs again on all members, including the new spare replica, and out_member + restart(); + run_if_in_pg(pg_id, [&]() { + verify_get_blob(pg_shard_id_vec, num_blobs_per_shard); + verify_obj_count(1, num_blobs_per_shard, num_shards_per_pg, false); + LOGINFO("After restart, check pg related data in pg members successfully"); + }); + + if (out_member_id == g_helper->my_replica_id()) { + verify_pg_destroy(pg_id, index_table_uuid_str, pg_shard_id_vec[pg_id]); + // since this case out_member don't have any pg, so we can check each chunk. + for (const auto& [_, chunk] : _obj_inst->chunk_selector()->m_chunks) { + ASSERT_EQ(chunk->m_state, ChunkState::AVAILABLE); + ASSERT_EQ(chunk->available_blks(), chunk->get_total_blks()); + } + LOGINFO("After restart, check no pg related data in out member successfully"); + } } SISL_OPTION_GROUP(