From a4d2f6ae35a44d594e6fe2dfea935313313ca30a Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Wed, 18 Sep 2024 22:27:02 +0800 Subject: [PATCH] opt --- .../cloud_engine_calc_delete_bitmap_task.cpp | 8 ++++-- be/src/common/config.cpp | 4 +++ be/src/common/config.h | 4 +++ .../olap/task/engine_publish_version_task.cpp | 26 ++++++++++++------- .../java/org/apache/doris/common/Config.java | 6 ++--- .../org/apache/doris/master/MasterImpl.java | 10 ++++--- 6 files changed, 40 insertions(+), 18 deletions(-) diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp index 33b9e51c7cbcf28..d22af1c1f39c9e0 100644 --- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp +++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp @@ -174,8 +174,12 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const { auto sync_rowset_time_us = MonotonicMicros() - t2; max_version = tablet->max_version_unlocked(); if (_version != max_version + 1) { - LOG(WARNING) << "version not continuous, current max version=" << max_version - << ", request_version=" << _version << " tablet_id=" << _tablet_id; + bool need_log = (config::publish_version_gap_logging_threshold < 0 || + max_version + config::publish_version_gap_logging_threshold >= _version); + if (need_log) { + LOG(WARNING) << "version not continuous, current max version=" << max_version + << ", request_version=" << _version << " tablet_id=" << _tablet_id; + } auto error_st = Status::Error("version not continuous"); _engine_calc_delete_bitmap_task->add_error_tablet_id(_tablet_id, error_st); diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 963d770276a2682..9acf6a89006fb1c 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1115,6 +1115,10 @@ DEFINE_mBool(enable_missing_rows_correctness_check, "false"); // When the number of missing versions is more than this value, do not directly // retry the publish and handle it through async publish. DEFINE_mInt32(mow_publish_max_discontinuous_version_num, "20"); +// When the version is not continuous for MOW table in publish phase and the gap between +// current txn's publishing version and the max version of the tablet exceeds this value, +// don't print warning log +DEFINE_mInt32(publish_version_gap_logging_threshold, "200") // The secure path with user files, used in the `local` table function. DEFINE_mString(user_files_secure_path, "${DORIS_HOME}"); diff --git a/be/src/common/config.h b/be/src/common/config.h index ce42bd47fc13965..a00391541d4cab1 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1185,6 +1185,10 @@ DECLARE_mBool(enable_missing_rows_correctness_check); // When the number of missing versions is more than this value, do not directly // retry the publish and handle it through async publish. DECLARE_mInt32(mow_publish_max_discontinuous_version_num); +// When the version is not continuous for MOW table in publish phase and the gap between +// current txn's publishing version and the max version of the tablet exceeds this value, +// don't print warning log +DECLARE_mInt32(publish_version_gap_logging_threshold); // The secure path with user files, used in the `local` table function. DECLARE_mString(user_files_secure_path); diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp index 09238f570b7567f..dae4c6be814d5ad 100644 --- a/be/src/olap/task/engine_publish_version_task.cpp +++ b/be/src/olap/task/engine_publish_version_task.cpp @@ -230,16 +230,22 @@ Status EnginePublishVersionTask::execute() { int64_t missed_version = max_version + 1; int64_t missed_txn_id = _engine.txn_manager()->get_txn_by_tablet_version( tablet->tablet_id(), missed_version); - auto msg = fmt::format( - "uniq key with merge-on-write version not continuous, " - "missed version={}, it's transaction_id={}, current publish " - "version={}, tablet_id={}, transaction_id={}", - missed_version, missed_txn_id, version.second, tablet->tablet_id(), - _publish_version_req.transaction_id); - if (first_time_update) { - LOG(INFO) << msg; - } else { - LOG_EVERY_SECOND(INFO) << msg; + bool need_log = + (config::publish_version_gap_logging_threshold < 0 || + max_version + config::publish_version_gap_logging_threshold >= + version.second); + if (need_log) { + auto msg = fmt::format( + "uniq key with merge-on-write version not continuous, " + "missed version={}, it's transaction_id={}, current publish " + "version={}, tablet_id={}, transaction_id={}", + missed_version, missed_txn_id, version.second, + tablet->tablet_id(), _publish_version_req.transaction_id); + if (first_time_update) { + LOG(INFO) << msg; + } else { + LOG_EVERY_SECOND(INFO) << msg; + } } }; // The versions during the schema change period need to be also continuous diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 1a288b40753fe49..8e0f656c1644291 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -496,9 +496,9 @@ public class Config extends ConfigBase { "print log interval for publish transaction failed interval"}) public static long publish_fail_log_interval_second = 5 * 60; - @ConfField(mutable = true, masterOnly = true, description = {"一个 PUBLISH_VERSION 任务失败打印日志次数上限", - "print log interval for publish transaction failed interval"}) - public static long publish_version_task_failed_times_log_threshold = 100; + @ConfField(mutable = true, masterOnly = true, description = {"一个 PUBLISH_VERSION 任务打印失败日志的次数上限", + "the upper limit of failure logs of PUBLISH_VERSION task"}) + public static long publish_version_task_failed_log_threshold = 80; @ConfField(mutable = true, masterOnly = true, description = {"提交事务的最大超时时间,单位是秒。" + "该参数仅用于事务型 insert 操作中。", diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 0005f5a91555085..b427f4b62babb8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -30,6 +30,7 @@ import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.TabletMeta; import org.apache.doris.cloud.catalog.CloudTablet; +import org.apache.doris.common.Config; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.load.DeleteJob; import org.apache.doris.load.loadv2.SparkLoadJob; @@ -62,7 +63,6 @@ import org.apache.doris.thrift.TTabletInfo; import org.apache.doris.thrift.TTaskType; -import com.amazonaws.services.glue.model.TaskType; import com.google.common.base.Preconditions; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -131,8 +131,12 @@ public TMasterResult finishTask(TFinishTaskRequest request) { } else { if (taskStatus.getStatusCode() != TStatusCode.OK) { task.failed(); - if (taskType == TTaskType.PUBLISH_VERSION && task.getFailedTimes() < ) { - LOG.warn("finish task reports bad. request: {}", request); + if (taskType == TTaskType.PUBLISH_VERSION) { + boolean needLog = (Config.publish_version_task_failed_log_threshold < 0 + || task.getFailedTimes() <= Config.publish_version_task_failed_log_threshold); + if (needLog) { + LOG.warn("finish task reports bad. request: {}", request); + } } String errMsg = "task type: " + taskType + ", status_code: " + taskStatus.getStatusCode().toString() + (taskStatus.isSetErrorMsgs() ? (", status_message: " + taskStatus.getErrorMsgs()) : "")