diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 75a22c84fee09f..0f69d813357492 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -88,8 +88,8 @@ Status CloudCumulativeCompaction::prepare_compact() { // NOTICE: after that, the cumulative point may be larger than max version of this tablet, but it doesn't matter. update_cumulative_point(); if (!config::enable_sleep_between_delete_cumu_compaction) { - st = Status::Error( - "_last_delete_version.first not equal to -1"); + st = Status::Error( + "cumulative compaction meet delete version"); } } return st; @@ -156,7 +156,8 @@ Status CloudCumulativeCompaction::request_global_lock() { LOG_WARNING("failed to prepare cumu compaction") .tag("job_id", _uuid) .tag("msg", resp.status().msg()); - return Status::Error("no suitable versions"); + return Status::Error( + "cumu no suitable versions: job tablet busy"); } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION) { (static_cast(_tablet.get()))->set_alter_version(resp.alter_version()); std::stringstream ss; @@ -472,7 +473,8 @@ Status CloudCumulativeCompaction::pick_rowsets_to_compact() { }); } if (candidate_rowsets.empty()) { - return Status::Error("no suitable versions"); + return Status::Error( + "no suitable versions: candidate rowsets empty"); } std::sort(candidate_rowsets.begin(), candidate_rowsets.end(), Rowset::comparator); if (auto st = check_version_continuity(candidate_rowsets); !st.ok()) { @@ -500,12 +502,14 @@ Status CloudCumulativeCompaction::pick_rowsets_to_compact() { &_last_delete_version, &compaction_score); if (_input_rowsets.empty()) { - return Status::Error("no suitable versions"); + return Status::Error( + "no suitable versions: input rowsets empty"); } else if (_input_rowsets.size() == 1 && !_input_rowsets.front()->rowset_meta()->is_segments_overlapping()) { VLOG_DEBUG << "there is only one rowset and not overlapping. tablet_id=" << _tablet->tablet_id() << ", version=" << _input_rowsets.front()->version(); - return Status::Error("no suitable versions"); + return Status::Error( + "no suitable versions: only one rowset and not overlapping"); } return Status::OK(); } diff --git a/be/src/cloud/cloud_storage_engine.cpp b/be/src/cloud/cloud_storage_engine.cpp index 7bf12bb486b056..8ad46e0dadbb17 100644 --- a/be/src/cloud/cloud_storage_engine.cpp +++ b/be/src/cloud/cloud_storage_engine.cpp @@ -739,12 +739,14 @@ Status CloudStorageEngine::_submit_cumulative_compaction_task(const CloudTabletS long now = duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); - if (st.is() && - st.msg() != "_last_delete_version.first not equal to -1") { - // Backoff strategy if no suitable version - tablet->last_cumu_no_suitable_version_ms = now; + if (!st.is()) { + if (st.is()) { + // Backoff strategy if no suitable version + tablet->last_cumu_no_suitable_version_ms = now; + } else { + tablet->set_last_cumu_compaction_failure_time(now); + } } - tablet->set_last_cumu_compaction_failure_time(now); std::lock_guard lock(_compaction_mtx); _tablet_preparing_cumu_compaction.erase(tablet->tablet_id()); return st; @@ -832,10 +834,9 @@ Status CloudStorageEngine::_submit_cumulative_compaction_task(const CloudTabletS if (_should_delay_large_task()) { long now = duration_cast(system_clock::now().time_since_epoch()) .count(); + // sleep 5s for this tablet tablet->set_last_cumu_compaction_failure_time(now); erase_executing_cumu_compaction(); - // sleep 5s for this tablet - tablet->last_cumu_no_suitable_version_ms = now; LOG_WARNING( "failed to do CloudCumulativeCompaction, cumu thread pool is " "intensive, delay large task.") diff --git a/be/src/cloud/cloud_tablet_mgr.cpp b/be/src/cloud/cloud_tablet_mgr.cpp index c9d3696420b89a..baaaaab9111950 100644 --- a/be/src/cloud/cloud_tablet_mgr.cpp +++ b/be/src/cloud/cloud_tablet_mgr.cpp @@ -335,11 +335,13 @@ Status CloudTabletMgr::get_topn_tablets_to_compact( auto now = duration_cast(system_clock::now().time_since_epoch()).count(); auto skip = [now, compaction_type](CloudTablet* t) { if (compaction_type == CompactionType::BASE_COMPACTION) { - return now - t->last_base_compaction_success_time_ms < config::base_compaction_freeze_interval_s * 1000; + return now - t->last_base_compaction_success_time_ms < config::base_compaction_freeze_interval_s * 1000 || + now - t->last_base_compaction_failure_time() < config::min_compaction_failure_interval_ms; } // If tablet has too many rowsets but not be compacted for a long time, compaction should be performed // regardless of whether there is a load job recently. - return now - t->last_cumu_no_suitable_version_ms < config::min_compaction_failure_interval_ms || + return now - t->last_cumu_compaction_failure_time() < config::min_compaction_failure_interval_ms || + now - t->last_cumu_no_suitable_version_ms < config::min_compaction_failure_interval_ms || (now - t->last_load_time_ms > config::cu_compaction_freeze_interval_s * 1000 && now - t->last_cumu_compaction_success_time_ms < config::cumu_compaction_interval_s * 1000 && t->fetch_add_approximate_num_rowsets(0) < config::max_tablet_version_num / 2); @@ -485,4 +487,7 @@ void CloudTabletMgr::get_topn_tablet_delete_bitmap_score( << max_base_rowset_delete_bitmap_score_tablet_id << ",tablets=[" << ss.str() << "]"; } +void CloudTabletMgr::put_tablet_for_UT(std::shared_ptr tablet) { + _tablet_map->put(tablet); +} } // namespace doris diff --git a/be/src/cloud/cloud_tablet_mgr.h b/be/src/cloud/cloud_tablet_mgr.h index 1a6ec72c1f7625..ab56586cd882f7 100644 --- a/be/src/cloud/cloud_tablet_mgr.h +++ b/be/src/cloud/cloud_tablet_mgr.h @@ -87,6 +87,9 @@ class CloudTabletMgr { void get_topn_tablet_delete_bitmap_score(uint64_t* max_delete_bitmap_score, uint64_t* max_base_rowset_delete_bitmap_score); + // **ATTN: JUST FOR UT** + void put_tablet_for_UT(std::shared_ptr tablet); + private: CloudStorageEngine& _engine; diff --git a/be/src/common/status.h b/be/src/common/status.h index d14239c8a41434..b80d25193d1528 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -248,6 +248,7 @@ namespace ErrorCode { E(CUMULATIVE_MISS_VERSION, -2006, true); \ E(FULL_NO_SUITABLE_VERSION, -2008, false); \ E(FULL_MISS_VERSION, -2009, true); \ + E(CUMULATIVE_MEET_DELETE_VERSION, -2010, false); \ E(META_INVALID_ARGUMENT, -3000, true); \ E(META_OPEN_DB_ERROR, -3001, true); \ E(META_KEY_NOT_FOUND, -3002, false); \ diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index 1df669cfe6fc2b..247c4d0c86501c 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -190,8 +190,8 @@ Status CumulativeCompaction::pick_rowsets_to_compact() { .tag("tablet id:", tablet()->tablet_id()) .tag("after cumulative compaction, cumu point:", tablet()->cumulative_layer_point()); - return Status::Error( - "_last_delete_version.first not equal to -1"); + return Status::Error( + "cumulative compaction meet delete version"); } // we did not meet any delete version. which means compaction_score is not enough to do cumulative compaction. diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 437984ccde0cd5..4ff1371010a189 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -1761,7 +1761,7 @@ Status Tablet::prepare_compaction_and_calculate_permits( permits = 0; // if we meet a delete version, should increase the cumulative point to let base compaction handle the delete version. // no need to wait 5s. - if (!(res.msg() == "_last_delete_version.first not equal to -1") || + if (!res.is() || config::enable_sleep_between_delete_cumu_compaction) { tablet->set_last_cumu_compaction_failure_time(UnixMillis()); } diff --git a/be/test/cloud/cloud_compaction_test.cpp b/be/test/cloud/cloud_compaction_test.cpp new file mode 100644 index 00000000000000..c8db6739084546 --- /dev/null +++ b/be/test/cloud/cloud_compaction_test.cpp @@ -0,0 +1,195 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include + +#include "cloud/cloud_storage_engine.h" +#include "cloud/cloud_tablet.h" +#include "cloud/cloud_tablet_mgr.h" +#include "gtest/gtest_pred_impl.h" +#include "json2pb/json_to_pb.h" +#include "olap/olap_common.h" +#include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_meta.h" +#include "olap/tablet_meta.h" +#include "util/uid_util.h" + +namespace doris { +class TabletMap; + +class CloudCompactionTest : public testing::Test { + CloudCompactionTest() : _engine(CloudStorageEngine({})) {} + void SetUp() override { + config::compaction_promotion_size_mbytes = 1024; + config::compaction_promotion_ratio = 0.05; + config::compaction_promotion_min_size_mbytes = 64; + config::compaction_min_size_mbytes = 64; + + _tablet_meta.reset(new TabletMeta(1, 2, 15673, 15674, 4, 5, TTabletSchema(), 6, {{7, 8}}, + UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK, + TCompressionType::LZ4F)); + + _json_rowset_meta = R"({ + "rowset_id": 540081, + "tablet_id": 15673, + "txn_id": 4042, + "tablet_schema_hash": 567997577, + "rowset_type": "BETA_ROWSET", + "rowset_state": "VISIBLE", + "start_version": 2, + "end_version": 2, + "num_rows": 3929, + "total_disk_size": 41, + "data_disk_size": 41, + "index_disk_size": 235, + "empty": false, + "load_id": { + "hi": -5350970832824939812, + "lo": -6717994719194512122 + }, + "creation_time": 1553765670, + "num_segments": 3 + })"; + } + void TearDown() override {} + + void init_rs_meta(RowsetMetaSharedPtr& pb1, int64_t start, int64_t end) { + RowsetMetaPB rowset_meta_pb; + json2pb::JsonToProtoMessage(_json_rowset_meta, &rowset_meta_pb); + rowset_meta_pb.set_start_version(start); + rowset_meta_pb.set_end_version(end); + rowset_meta_pb.set_creation_time(10000); + + pb1->init_from_pb(rowset_meta_pb); + pb1->set_total_disk_size(41); + pb1->set_tablet_schema(_tablet_meta->tablet_schema()); + } + + void init_rs_meta_small_base(std::vector* rs_metas) { + RowsetMetaSharedPtr ptr1(new RowsetMeta()); + init_rs_meta(ptr1, 0, 0); + rs_metas->push_back(ptr1); + + RowsetMetaSharedPtr ptr2(new RowsetMeta()); + init_rs_meta(ptr2, 1, 1); + rs_metas->push_back(ptr2); + + RowsetMetaSharedPtr ptr3(new RowsetMeta()); + init_rs_meta(ptr3, 2, 2); + rs_metas->push_back(ptr3); + + RowsetMetaSharedPtr ptr4(new RowsetMeta()); + init_rs_meta(ptr4, 3, 3); + rs_metas->push_back(ptr4); + + RowsetMetaSharedPtr ptr5(new RowsetMeta()); + init_rs_meta(ptr5, 4, 4); + rs_metas->push_back(ptr5); + } + +protected: + std::string _json_rowset_meta; + TabletMetaSharedPtr _tablet_meta; + +public: + CloudStorageEngine _engine; +}; + +TEST_F(CloudCompactionTest, failure_base_compaction_tablet_sleep_test) { + auto filter_out = [](CloudTablet* t) { return false; }; + CloudTabletMgr mgr(_engine); + + std::vector rs_metas; + init_rs_meta_small_base(&rs_metas); + + CloudTabletSPtr tablet1 = std::make_shared(_engine, _tablet_meta); + for (auto& rs_meta : rs_metas) { + static_cast(_tablet_meta->add_rs_meta(rs_meta)); + } + tablet1->tablet_meta()->_tablet_id = 10000; + tablet1->set_last_base_compaction_failure_time( + duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() - + 100000); + tablet1->set_last_base_compaction_failure_time(0); + tablet1->tablet_meta()->tablet_schema()->set_disable_auto_compaction(false); + tablet1->_approximate_num_rowsets = 10; + mgr.put_tablet_for_UT(tablet1); + + int64_t max_score; + std::vector> tablets {}; + Status st = mgr.get_topn_tablets_to_compact(1, CompactionType::BASE_COMPACTION, filter_out, + &tablets, &max_score); + ASSERT_EQ(st, Status::OK()); + ASSERT_EQ(tablets.size(), 1); + + tablet1->set_last_base_compaction_failure_time( + duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + st = mgr.get_topn_tablets_to_compact(1, CompactionType::BASE_COMPACTION, filter_out, &tablets, + &max_score); + ASSERT_EQ(st, Status::OK()); + ASSERT_EQ(tablets.size(), 0); +} + +TEST_F(CloudCompactionTest, failure_cumu_compaction_tablet_sleep_test) { + auto filter_out = [](CloudTablet* t) { return false; }; + CloudTabletMgr mgr(_engine); + + std::vector rs_metas; + init_rs_meta_small_base(&rs_metas); + + CloudTabletSPtr tablet1 = std::make_shared(_engine, _tablet_meta); + for (auto& rs_meta : rs_metas) { + static_cast(_tablet_meta->add_rs_meta(rs_meta)); + } + tablet1->tablet_meta()->_tablet_id = 10000; + tablet1->set_last_cumu_compaction_failure_time( + duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() - + 100000); + tablet1->set_last_cumu_compaction_failure_time(0); + tablet1->tablet_meta()->tablet_schema()->set_disable_auto_compaction(false); + tablet1->_approximate_cumu_num_deltas = 10; + mgr.put_tablet_for_UT(tablet1); + + int64_t max_score; + std::vector> tablets {}; + Status st = mgr.get_topn_tablets_to_compact(1, CompactionType::CUMULATIVE_COMPACTION, + filter_out, &tablets, &max_score); + ASSERT_EQ(st, Status::OK()); + ASSERT_EQ(tablets.size(), 1); + + tablet1->set_last_cumu_compaction_failure_time( + duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + st = mgr.get_topn_tablets_to_compact(1, CompactionType::BASE_COMPACTION, filter_out, &tablets, + &max_score); + ASSERT_EQ(st, Status::OK()); + ASSERT_EQ(tablets.size(), 0); +} +} // namespace doris diff --git a/regression-test/plugins/plugin_compaction.groovy b/regression-test/plugins/plugin_compaction.groovy index 45dd99a97a3db6..b187fe4b178c41 100644 --- a/regression-test/plugins/plugin_compaction.groovy +++ b/regression-test/plugins/plugin_compaction.groovy @@ -106,7 +106,7 @@ Suite.metaClass.trigger_and_wait_compaction = { String table_name, String compac triggered_tablets.add(tablet) // compaction already in queue, treat it as successfully triggered } else if (!auto_compaction_disabled) { // ignore the error if auto compaction enabled - } else if (status_lower.contains("e-2000")) { + } else if (status_lower.contains("e-2000") || status_lower.contains("e-2010")) { // ignore this tablet compaction. } else if (ignored_errors.any { error -> status_lower.contains(error.toLowerCase()) }) { // ignore this tablet compaction if the error is in the ignored_errors list