From e1ebae5d20d40a5fdbe151e3ca730210fe9b86a5 Mon Sep 17 00:00:00 2001 From: Uniqueyou Date: Thu, 7 Aug 2025 23:28:33 +0800 Subject: [PATCH] [fix](checker) Fix inverted check and test for checker (#54403) related #53876 test: ``` [ RUN ] CheckerTest.normal_check_index_file_v1 [ RUN ] CheckerTest.normal_inverted_check_index_file_v1 [ RUN ] CheckerTest.normal_check_index_file_v2 [ RUN ] CheckerTest.normal_inverted_check_index_file_v2 [ RUN ] CheckerTest.abnormal_check_index_file_v1 (loss key) [ RUN ] CheckerTest.abnormal_inverted_check_index_file_v1 (loss file) [ RUN ] CheckerTest.abnormal_inverted_check_index_file_v2 (loss file) [ RUN ] CheckerTest.abnormal_check_index_file_v2 (loss key) ``` --- cloud/src/recycler/checker.cpp | 43 +++- cloud/test/recycler_test.cpp | 422 ++++++++++++++++++++------------- 2 files changed, 294 insertions(+), 171 deletions(-) diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp index a400e797e03f77..96b17872386df9 100644 --- a/cloud/src/recycler/checker.cpp +++ b/cloud/src/recycler/checker.cpp @@ -576,7 +576,7 @@ int InstanceChecker::do_check() { TabletIndexPB tablet_index; if (get_tablet_idx(txn_kv_.get(), instance_id_, rs_meta.tablet_id(), tablet_index) == -1) { - LOG(WARNING) << "failedt to get tablet index, tablet_id= " << rs_meta.tablet_id(); + LOG(WARNING) << "failed to get tablet index, tablet_id= " << rs_meta.tablet_id(); return; } @@ -605,8 +605,8 @@ int InstanceChecker::do_check() { InvertedIndexStorageFormatPB::V1) { for (const auto& index_id : index_ids) { LOG(INFO) << "check inverted index, tablet_id=" << rs_meta.tablet_id() - << " rowset_id=" << rs_meta.rowset_id_v2() - << " segment_index=" << i << " index_id=" << index_id.first + << " rowset_id=" << rs_meta.rowset_id_v2() << " segment_id=" << i + << " index_id=" << index_id.first << " index_suffix_name=" << index_id.second; index_path_v.emplace_back( inverted_index_path_v1(rs_meta.tablet_id(), rs_meta.rowset_id_v2(), @@ -619,14 +619,17 @@ int InstanceChecker::do_check() { if (!index_path_v.empty()) { if (std::ranges::all_of(index_path_v, [&](const auto& idx_file_path) { - return tablet_files_cache.files.contains(idx_file_path); + if (!tablet_files_cache.files.contains(idx_file_path)) { + LOG(INFO) << "loss index file: " << idx_file_path; + return false; + } + return true; })) { continue; } } index_file_loss = true; data_loss = true; - LOG(WARNING) << "object not exist, key=" << hex(tablet_idx_key); } } }; @@ -735,6 +738,10 @@ int InstanceChecker::do_inverted_check() { butil::SplitString(obj_key, '/', &str); // data/{tablet_id}/{rowset_id}_{seg_num}.dat if (str.size() < 3) { + // clang-format off + LOG(WARNING) << "split obj_key error, str.size() should be less than 3," + << " value = " << str.size(); + // clang-format on return -1; } @@ -744,6 +751,11 @@ int InstanceChecker::do_inverted_check() { return -1; } + if (!str[2].ends_with(".dat")) { + // skip check not segment file + return 0; + } + std::string rowset_id; if (auto pos = str.back().find('_'); pos != std::string::npos) { rowset_id = str.back().substr(0, pos); @@ -813,6 +825,10 @@ int InstanceChecker::do_inverted_check() { // format v1: data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx // format v2: data/{tablet_id}/{rowset_id}_{seg_num}.idx if (str.size() < 3) { + // clang-format off + LOG(WARNING) << "split obj_key error, str.size() should be less than 3," + << " value = " << str.size(); + // clang-format on return -1; } @@ -1281,8 +1297,11 @@ int InstanceChecker::check_inverted_index_file_storage_format_v1( for (const auto& i : rs_meta.tablet_schema().index()) { if (i.has_index_type() && i.index_type() == IndexType::INVERTED) { + LOG(INFO) << fmt::format( + "record index info, index_id: {}, index_suffix_name: {}", i.index_id(), + i.index_suffix_name()); rowset_index_cache_v1.index_ids.insert( - fmt::format("{}{}", i.index_name(), i.index_suffix_name())); + fmt::format("{}{}", i.index_id(), i.index_suffix_name())); } } @@ -1296,13 +1315,21 @@ int InstanceChecker::check_inverted_index_file_storage_format_v1( if (!rowset_index_cache_v1.segment_ids.contains(segment_id)) { // Garbage data leak - LOG(WARNING) << "rowset should be recycled, key=" << file_path; + // clang-format off + LOG(WARNING) << "rowset_index_cache_v1.segment_ids don't contains segment_id, rowset should be recycled," + << " key = " << file_path + << " segment_id = " << segment_id; + // clang-format on return 1; } if (!rowset_index_cache_v1.index_ids.contains(index_id_with_suffix_name)) { // Garbage data leak - LOG(WARNING) << "rowset with inde meta should be recycled, key=" << file_path; + // clang-format off + LOG(WARNING) << "rowset_index_cache_v1.index_ids don't contains index_id_with_suffix_name," + << " rowset with inde meta should be recycled, key=" << file_path + << " index_id_with_suffix_name=" << index_id_with_suffix_name; + // clang-format on return 1; } diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp index 57a075eb5b6d27..0411e801905920 100644 --- a/cloud/test/recycler_test.cpp +++ b/cloud/test/recycler_test.cpp @@ -75,78 +75,50 @@ std::vector index_v2_file_path = { "data/1753202639971/02000000000026fo56l8q4p0n2l6n4k343m7o5l9p2o8n4p0_0.idx", "data/1753202639973/02000000000027gp67m9r5q8q4p0n2l1o4n8p6m0q3p9o5q1_0.idx", "data/1753202639975/02000000000028hq78n0s6rm9r5q8q42p5o9q7n1r4q0p6r2_0.idx", - "data/1753202639977/02000000000029ir89o1t7s78n0s6rm3q6p0r8o2s5r1q7s3_4.idx", + "data/1753202639977/02000000000029ir89o1t7s78n0s6rm3q6p0r8o2s5r1q7s3_0.idx", "data/1753202639979/0200000000002ajs90p2u8t4m3q6p0r8r7q1s9p3t6s2r8t4_0.idx", "data/1753202639981/0200000000002bkt01q3v9u2u8t4m3q5s8r2t0q4u7t3s9u5_0.idx", "data/1753202639983/0200000000002clu12r4w1q3v9u2u0v6t9s3u1r5v8u4t0v6_0.idx", "data/1753202639985/0200000000002dmv23s5x1w7u0t4t9s3u1r5v2s6w9v5u1w7_0.idx"}; -std::vector segment_v2_file_path = { - "data/1753202639945/0200000000001a5c92f4e7d9j8f2b4c8a3e6f8b1c9d2e5f8_0.dat", - "data/1753202639947/0200000000001b8d45a74r6c7sf3e9c2b6d4a8e1f7c3d9e2_0.dat", - "data/1753202639951/0200000000001c9e56b8g4f0x8s7g2f0d3c7e5b9f2e8d4f0_0.dat", - "data/1753202639953/0200000000001d0f67c9h5g8a3e6f8b1e4d8f6c0g3f9e5g1_0.dat", - "data/1753202639955/0200000000001e1g78d067c9h5g8i6h2f5e9g7d1h4g0f6h2_0.dat", - "data/1753202639957/0200000000001f2h89e1jg7d1h4g07i3g6f0h8e2i5h1g7i3_0.dat", - "data/1753202639959/020000000000208i90f2k0h8e2i5h8j4h7g1i9f3j6i2h8j4_0.dat", - "data/1753202639961/02000000000021aj01g3l9k5i8h2j8e2i5h8j0g4k7j3i9k5_0.dat", - "data/1753202639963/02000000000022bk12h4m0lk0h8e2i56j9i3k1h5l8k4j0l6_0.dat", - "data/1753202639965/02000000000023cl23i5n1m7g3l9k5i8k0j4l2i6m9l5k1m7_0.dat", - "data/1753202639967/02000000000024dm34j1m7g3l9k6o2n8l1k5m3j7n0m6l2n8_0.dat", - "data/1753202639969/02000000000025en45k7p3o9m2l6n4k34j1m7g38o1n7m3o9_0.dat", - "data/1753202639971/02000000000026fo56l8q4p0n2l6n4k343m7o5l9p2o8n4p0_0.dat", - "data/1753202639973/02000000000027gp67m9r5q8q4p0n2l1o4n8p6m0q3p9o5q1_0.dat", - "data/1753202639975/02000000000028hq78n0s6rm9r5q8q42p5o9q7n1r4q0p6r2_0.dat", - "data/1753202639977/02000000000029ir89o1t7s78n0s6rm3q6p0r8o2s5r1q7s3_4.dat", - "data/1753202639979/0200000000002ajs90p2u8t4m3q6p0r8r7q1s9p3t6s2r8t4_0.dat", - "data/1753202639981/0200000000002bkt01q3v9u2u8t4m3q5s8r2t0q4u7t3s9u5_0.dat", - "data/1753202639983/0200000000002clu12r4w1q3v9u2u0v6t9s3u1r5v8u4t0v6_0.dat", - "data/1753202639985/0200000000002dmv23s5x1w7u0t4t9s3u1r5v2s6w9v5u1w7_0.dat"}; - // clang-format off std::vector index_v1_file_path = { "data/1753202846974/0200000000007864994f6aa97288842758c2e89b03e65682_0_1753202846943.idx", "data/1753202845724/020000000000786635407b55b72242ac167cf83cd4c598a2_0_1753202841593.idx", - "data/1753202846984/020000000000788bdd40fcf18bcaa1bbd4058ef92606e79a_0_1753202846943.idx", - "data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_0_1753202846943.idx", - "data/1753202846986/02000000000078ec35407b55b72242ac167cf83cd4c598a2_0_1753202846943.idx", + "data/1753202846984/020000000000788bdd40fcf18bcaa1bbd4058ef92606e79a_0_1753202846923.idx", + "data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_0_1753202846963.idx", + "data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_0_1753202846903.idx", + "data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_1_1753202846903.idx", + "data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_1_1753202846963.idx", "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202844931.idx", - "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202846410.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753222846410.idx", "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202847011.idx", - "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202844931.idx", - "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202846410.idx", - "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202847011.idx", - "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202858543.idx", - "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202844931.idx", - "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202846410.idx", - "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202847011.idx", - "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202858543.idx", - "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202844931.idx", - "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202846410.idx", - "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202847011.idx"}; + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_1_1753202844931.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_1_1753222846410.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_1_1753202847011.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_2_1753202844931.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_2_1753222846410.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_2_1753202847011.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202843931.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753252846410.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202847021.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_1_1753202843931.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_1_1753252846410.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_1_1753202847021.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_2_1753202843931.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_2_1753252846410.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_2_1753202847021.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202824931.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1756202846410.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202847071.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_1_1753202824931.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_1_1756202846410.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_1_1753202847071.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_2_1753202824931.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_2_1756202846410.idx", + "data/1753202858458/0200000000007afc994f6aa97288842758c2e89b03e65682_2_1753202847071.idx"}; // clang-format on -std::vector segment_v1_file_path = { - "data/1753202846974/0200000000007864994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202845724/020000000000786635407b55b72242ac167cf83cd4c598a2_0.dat", - "data/1753202846984/020000000000788bdd40fcf18bcaa1bbd4058ef92606e79a_0.dat", - "data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_0.dat", - "data/1753202846986/02000000000078ec35407b55b72242ac167cf83cd4c598a2_0.dat", - "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat", - "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat", - "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat", - "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat", - "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat", - "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat", - "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat"}; - doris::cloud::RecyclerThreadPoolGroup thread_group; int main(int argc, char** argv) { @@ -452,9 +424,10 @@ static int create_committed_rowset_by_real_index_v2_file(TxnKv* txn_kv, std::string segment_str = filename.substr(underscore_pos + 1, dot_pos - underscore_pos - 1); std::string extension = filename.substr(dot_pos + 1); - int segment_id = stoll(segment_str); - int64_t tablet_index_id = 123; // Default index id - int64_t schema_version = 456; // Default schema version + int64_t segment_id = stoll(segment_str); + int64_t tablet_index_id = tablet_id + 10; + // take the last 4 digits of tablet_id as the unique identifier + int64_t schema_version = std::atoll(path_parts[1].substr(path_parts[1].size() - 4).c_str()); // Create rowset meta data MetaRowsetKeyInfo key_info {instance_id, tablet_id, version}; @@ -494,26 +467,39 @@ static int create_committed_rowset_by_real_index_v2_file(TxnKv* txn_kv, // Create tablet schema if dealing with index files if (extension == "idx") { - doris::TabletSchemaCloudPB tablet_schema; - tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); - tablet_schema.set_schema_version(schema_version); - - auto index = tablet_schema.add_index(); - index->set_index_id(0); - index->set_index_type(IndexType::INVERTED); - std::string tablet_schema_key = meta_schema_key({instance_id, tablet_index_id, schema_version}); - tablet_schema.SerializeToString(&val); + std::string tablet_schema_val; if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { return -1; } + doris::TabletSchemaCloudPB tablet_schema; + + if (txn->get(tablet_schema_key, &tablet_schema_val) == TxnErrorCode::TXN_KEY_NOT_FOUND) { + tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + tablet_schema.set_schema_version(schema_version); + + auto index = tablet_schema.add_index(); + index->set_index_id(tablet_schema.index().size()); + index->set_index_type(IndexType::INVERTED); + + } else { + tablet_schema.ParseFromString(tablet_schema_val); + + auto index = tablet_schema.add_index(); + index->set_index_id(tablet_schema.index().size()); + index->set_index_type(IndexType::INVERTED); + } + tablet_schema.SerializeToString(&val); + txn->put(tablet_schema_key, val); if (txn->commit() != TxnErrorCode::TXN_OK) { return -1; } } + std::string segment_path = file_path.substr(0, file_path.size() - 4) + ".dat"; + accessor->put_file(segment_path, ""); accessor->put_file(file_path, ""); return 0; @@ -523,7 +509,7 @@ static int create_committed_rowset_by_real_index_v1_file(TxnKv* txn_kv, StorageVaultAccessor* accessor, const std::string& resource_id, const std::string& file_path, - int64_t version = 1) { + size_t& version) { std::string val; std::unique_ptr txn; @@ -567,8 +553,8 @@ static int create_committed_rowset_by_real_index_v1_file(TxnKv* txn_kv, int segment_id = stoll(segment_str); int64_t index_id = std::stoll(index_id_str); - int64_t tablet_index_id = 123; // Default tablet index id - int64_t schema_version = 456; // Default schema version + int64_t tablet_index_id = tablet_id + 10; + int64_t schema_version = std::atoll(path_parts[1].substr(path_parts[1].size() - 4).c_str()); // Create rowset meta data MetaRowsetKeyInfo key_info {instance_id, tablet_id, version}; @@ -608,29 +594,45 @@ static int create_committed_rowset_by_real_index_v1_file(TxnKv* txn_kv, // Create tablet schema if dealing with index files if (extension == "idx") { - doris::TabletSchemaCloudPB tablet_schema; - tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1); - tablet_schema.set_schema_version(schema_version); - - auto index = tablet_schema.add_index(); - index->set_index_id(index_id); - index->set_index_type(IndexType::INVERTED); - if (!index_suffix.empty()) { - index->set_index_suffix_name(index_suffix); - } - std::string tablet_schema_key = meta_schema_key({instance_id, tablet_index_id, schema_version}); - tablet_schema.SerializeToString(&val); + std::string tablet_schema_val; if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { return -1; } + doris::TabletSchemaCloudPB tablet_schema; + + if (txn->get(tablet_schema_key, &tablet_schema_val) == TxnErrorCode::TXN_KEY_NOT_FOUND) { + tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1); + tablet_schema.set_schema_version(schema_version); + + auto index = tablet_schema.add_index(); + index->set_index_id(index_id); + index->set_index_type(IndexType::INVERTED); + if (!index_suffix.empty()) { + index->set_index_suffix_name(index_suffix); + } + + } else { + tablet_schema.ParseFromString(tablet_schema_val); + + auto* index = tablet_schema.add_index(); + index->set_index_id(index_id); + index->set_index_type(IndexType::INVERTED); + if (!index_suffix.empty()) { + index->set_index_suffix_name(index_suffix); + } + } + tablet_schema.SerializeToString(&val); + txn->put(tablet_schema_key, val); if (txn->commit() != TxnErrorCode::TXN_OK) { return -1; } } + std::string segment_path = fmt::format("data/{}/{}_{}.dat", tablet_id, rowset_id, segment_id); + accessor->put_file(segment_path, ""); accessor->put_file(file_path, ""); return 0; @@ -2896,7 +2898,7 @@ TEST(CheckerTest, DISABLED_abnormal_inverted_check) { ASSERT_NE(checker.do_inverted_check(), 0); } -TEST(CheckerTest, normal_check_index_file) { +TEST(CheckerTest, normal_check_index_file_v1) { auto txn_kv = std::make_shared(); ASSERT_EQ(txn_kv->init(), 0); @@ -2925,17 +2927,16 @@ TEST(CheckerTest, normal_check_index_file) { // Add some visible rowsets along with some rowsets that should be recycled // call inverted check after do recycle which would sweep all the rowsets not visible auto accessor = checker.accessor_map_.begin()->second; - for (const auto& file : index_v2_file_path) { - create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + size_t version = 0; + for (const auto& file : index_v1_file_path) { + create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file, + version); } - for (const auto& file : segment_v2_file_path) { - create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); - } - ASSERT_EQ(checker.do_inverted_check(), 0); + ASSERT_EQ(checker.do_check(), 0); } -TEST(CheckerTest, normal_inverted_check_index_file) { +TEST(CheckerTest, normal_inverted_check_index_file_v1) { auto txn_kv = std::make_shared(); ASSERT_EQ(txn_kv->init(), 0); @@ -2964,17 +2965,16 @@ TEST(CheckerTest, normal_inverted_check_index_file) { // Add some visible rowsets along with some rowsets that should be recycled // call inverted check after do recycle which would sweep all the rowsets not visible auto accessor = checker.accessor_map_.begin()->second; - for (const auto& file : index_v2_file_path) { - create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + size_t version = 0; + for (const auto& file : index_v1_file_path) { + create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file, + version); } - for (const auto& file : segment_v2_file_path) { - create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); - } ASSERT_EQ(checker.do_inverted_check(), 0); } -TEST(CheckerTest, inverted_check_recycle_idx_file_v1) { +TEST(CheckerTest, normal_check_index_file_v2) { auto* sp = SyncPoint::get_instance(); std::unique_ptr> defer((int*)0x01, [&sp](int*) { sp->clear_all_call_backs(); @@ -3013,55 +3013,72 @@ TEST(CheckerTest, inverted_check_recycle_idx_file_v1) { }); sp->enable_processing(); - for (const auto& file : index_v1_file_path) { - create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file); + size_t version = 1; + for (const auto& file : index_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file, + version++); } - for (const auto& file : segment_v1_file_path) { - create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file); - } + std::unique_ptr list_iter; + int ret = accessor->list_directory("data", &list_iter); + ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret; - size_t delete_kv_num = 5; - std::string meta_rowset_key_begin, meta_rowset_key_end; - meta_rowset_key({instance_id, 0, 1}, &meta_rowset_key_begin); - meta_rowset_key({instance_id, INT64_MAX, 1}, &meta_rowset_key_end); - std::vector rowset_key_to_delete; - std::unique_ptr txn; - TxnErrorCode err = txn_kv->create_txn(&txn); - DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + ASSERT_EQ(checker.do_check(), 0); +} - std::unique_ptr it; - do { - err = txn->get(meta_rowset_key_begin, meta_rowset_key_end, &it); - while (it->has_next()) { - auto [k, v] = it->next(); - if (rowset_key_to_delete.size() < delete_kv_num) { - rowset_key_to_delete.emplace_back(k); - } - if (!it->has_next()) { - meta_rowset_key_begin = k; - } - } - meta_rowset_key_begin.push_back('\x00'); - } while (it->more()); +TEST(CheckerTest, normal_inverted_check_index_file_v2) { + auto* sp = SyncPoint::get_instance(); + std::unique_ptr> defer((int*)0x01, [&sp](int*) { + sp->clear_all_call_backs(); + sp->disable_processing(); + }); - for (const auto& key : rowset_key_to_delete) { - std::unique_ptr txn; - TxnErrorCode err = txn_kv->create_txn(&txn); - DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; - txn->remove(key); - err = txn->commit(); - DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + auto txn_kv = std::make_shared(); + ASSERT_EQ(txn_kv->init(), 0); + + InstanceInfoPB instance; + instance.set_instance_id(instance_id); + auto obj_info = instance.add_obj_info(); + obj_info->set_id("1"); + obj_info->set_ak(config::test_s3_ak); + obj_info->set_sk(config::test_s3_sk); + obj_info->set_endpoint(config::test_s3_endpoint); + obj_info->set_region(config::test_s3_region); + obj_info->set_bucket(config::test_s3_bucket); + obj_info->set_prefix("CheckerTest"); + + InstanceChecker checker(txn_kv, instance_id); + ASSERT_EQ(checker.init(instance), 0); + // Add some visible rowsets along with some rowsets that should be recycled + // call inverted check after do recycle which would sweep all the rowsets not visible + auto accessor = checker.accessor_map_.begin()->second; + + sp->set_call_back( + "InstanceRecycler::init_storage_vault_accessors.mock_vault", [&accessor](auto&& args) { + auto* map = try_any_cast< + std::unordered_map>*>( + args[0]); + auto* vault = try_any_cast(args[1]); + if (vault->name() == "test_success_hdfs_vault") { + map->emplace(vault->id(), accessor); + } + }); + sp->enable_processing(); + + size_t version = 1; + for (const auto& file : index_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file, + version++); } std::unique_ptr list_iter; int ret = accessor->list_directory("data", &list_iter); ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret; - ASSERT_EQ(checker.do_inverted_check(), 1); + ASSERT_EQ(checker.do_inverted_check(), 0); } -TEST(CheckerTest, inverted_check_recycle_idx_file_v2) { +TEST(CheckerTest, abnormal_check_index_file_v1) { auto* sp = SyncPoint::get_instance(); std::unique_ptr> defer((int*)0x01, [&sp](int*) { sp->clear_all_call_backs(); @@ -3099,13 +3116,73 @@ TEST(CheckerTest, inverted_check_recycle_idx_file_v2) { } }); sp->enable_processing(); + size_t version = 0; + for (const auto& file : index_v1_file_path) { + create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file, + version); + } - for (const auto& file : index_v2_file_path) { - create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + std::unique_ptr list_iter; + int ret = accessor->list_directory("data", &list_iter); + ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret; + + int64_t tablet_to_delete = -1; + for (auto file = list_iter->next(); file.has_value(); file = list_iter->next()) { + std::vector str; + butil::SplitString(file->path, '/', &str); + int64_t tablet_id = atol(str[1].c_str()); + + // delete all index files of ever tablet for mock missing + if (file->path.ends_with(".idx") && tablet_to_delete != tablet_id) { + tablet_to_delete = tablet_id; + accessor->delete_file(file->path); + } } + ASSERT_EQ(checker.do_check(), 1); +} + +TEST(CheckerTest, abnormal_inverted_check_index_file_v1) { + auto* sp = SyncPoint::get_instance(); + std::unique_ptr> defer((int*)0x01, [&sp](int*) { + sp->clear_all_call_backs(); + sp->disable_processing(); + }); - for (const auto& file : segment_v2_file_path) { - create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + auto txn_kv = std::make_shared(); + ASSERT_EQ(txn_kv->init(), 0); + + InstanceInfoPB instance; + instance.set_instance_id(instance_id); + auto obj_info = instance.add_obj_info(); + obj_info->set_id("1"); + obj_info->set_ak(config::test_s3_ak); + obj_info->set_sk(config::test_s3_sk); + obj_info->set_endpoint(config::test_s3_endpoint); + obj_info->set_region(config::test_s3_region); + obj_info->set_bucket(config::test_s3_bucket); + obj_info->set_prefix("CheckerTest"); + + InstanceChecker checker(txn_kv, instance_id); + ASSERT_EQ(checker.init(instance), 0); + // Add some visible rowsets along with some rowsets that should be recycled + // call inverted check after do recycle which would sweep all the rowsets not visible + auto accessor = checker.accessor_map_.begin()->second; + + sp->set_call_back( + "InstanceRecycler::init_storage_vault_accessors.mock_vault", [&accessor](auto&& args) { + auto* map = try_any_cast< + std::unordered_map>*>( + args[0]); + auto* vault = try_any_cast(args[1]); + if (vault->name() == "test_success_hdfs_vault") { + map->emplace(vault->id(), accessor); + } + }); + sp->enable_processing(); + size_t version = 0; + for (const auto& file : index_v1_file_path) { + create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file, + version); } size_t delete_kv_num = 5; @@ -3148,7 +3225,7 @@ TEST(CheckerTest, inverted_check_recycle_idx_file_v2) { ASSERT_EQ(checker.do_inverted_check(), 1); } -TEST(CheckerTest, forward_check_recycle_idx_file_v1) { +TEST(CheckerTest, abnormal_inverted_check_index_file_v2) { auto* sp = SyncPoint::get_instance(); std::unique_ptr> defer((int*)0x01, [&sp](int*) { sp->clear_all_call_backs(); @@ -3187,33 +3264,53 @@ TEST(CheckerTest, forward_check_recycle_idx_file_v1) { }); sp->enable_processing(); - for (const auto& file : index_v1_file_path) { - create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file); + size_t version = 1; + for (const auto& file : index_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file, + version++); } - for (const auto& file : segment_v1_file_path) { - create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file); + size_t delete_kv_num = 5; + std::string meta_rowset_key_begin, meta_rowset_key_end; + meta_rowset_key({instance_id, 0, 1}, &meta_rowset_key_begin); + meta_rowset_key({instance_id, INT64_MAX, 1}, &meta_rowset_key_end); + std::vector rowset_key_to_delete; + std::unique_ptr txn; + TxnErrorCode err = txn_kv->create_txn(&txn); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + + std::unique_ptr it; + do { + err = txn->get(meta_rowset_key_begin, meta_rowset_key_end, &it); + while (it->has_next()) { + auto [k, v] = it->next(); + if (rowset_key_to_delete.size() < delete_kv_num) { + rowset_key_to_delete.emplace_back(k); + } + if (!it->has_next()) { + meta_rowset_key_begin = k; + } + } + meta_rowset_key_begin.push_back('\x00'); + } while (it->more()); + + for (const auto& key : rowset_key_to_delete) { + std::unique_ptr txn; + TxnErrorCode err = txn_kv->create_txn(&txn); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + txn->remove(key); + err = txn->commit(); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; } + std::unique_ptr list_iter; int ret = accessor->list_directory("data", &list_iter); ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret; - int64_t tablet_to_delete = -1; - for (auto file = list_iter->next(); file.has_value(); file = list_iter->next()) { - std::vector str; - butil::SplitString(file->path, '/', &str); - int64_t tablet_id = atol(str[1].c_str()); - - // delete all index files of ever tablet for mock missing - if (file->path.ends_with(".idx") && tablet_to_delete != tablet_id) { - tablet_to_delete = tablet_id; - accessor->delete_file(file->path); - } - } - ASSERT_EQ(checker.do_check(), 1); + ASSERT_EQ(checker.do_inverted_check(), 1); } -TEST(CheckerTest, forward_check_recycle_idx_file_v2) { +TEST(CheckerTest, abnormal_check_index_file_v2) { auto* sp = SyncPoint::get_instance(); std::unique_ptr> defer((int*)0x01, [&sp](int*) { sp->clear_all_call_backs(); @@ -3252,13 +3349,12 @@ TEST(CheckerTest, forward_check_recycle_idx_file_v2) { }); sp->enable_processing(); + size_t version = 1; for (const auto& file : index_v2_file_path) { - create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file, + version++); } - for (const auto& file : segment_v2_file_path) { - create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); - } std::unique_ptr list_iter; int ret = accessor->list_directory("data", &list_iter); ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;