From 0446b9945733ab1450936ba2d08fc92b7a79444a Mon Sep 17 00:00:00 2001 From: lambxu Date: Tue, 29 Apr 2025 14:33:18 +0800 Subject: [PATCH 1/4] fix-safe-drop --- cloud/src/common/config.h | 1 + cloud/src/meta-service/meta_service_resource.cpp | 4 +++- cloud/src/resource-manager/resource_manager.cpp | 6 +++--- cloud/src/resource-manager/resource_manager.h | 4 +++- gensrc/proto/cloud.proto | 2 ++ 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 360b83568b51d8..837d20b9a44051 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -94,6 +94,7 @@ CONF_mInt64(delete_bitmap_storage_optimize_check_version_gap, "1000"); CONF_mInt32(scan_instances_interval_seconds, "60"); // 1min // interval for check object CONF_mInt32(check_object_interval_seconds, "43200"); // 12hours +CONF_mInt32(sql_node_safe_drop_time_seconds, "300");// 5min CONF_mInt64(check_recycle_task_interval_seconds, "600"); // 10min CONF_mInt64(recycler_sleep_before_scheduling_seconds, "60"); diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp index eb88694ff5776f..a88e1db8310dc3 100644 --- a/cloud/src/meta-service/meta_service_resource.cpp +++ b/cloud/src/meta-service/meta_service_resource.cpp @@ -2072,7 +2072,9 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, msg = r.second; } break; case AlterClusterRequest::DROP_CLUSTER: { - auto r = resource_mgr_->drop_cluster(instance_id, cluster); + bool safe_drop_on_sql_cluster = request->has_safe_drop_on_sql_cluster() + ? request->safe_drop_on_sql_cluster() : true; + auto r = resource_mgr_->drop_cluster(instance_id, cluster, safe_drop_on_sql_cluster); code = r.first; msg = r.second; } break; diff --git a/cloud/src/resource-manager/resource_manager.cpp b/cloud/src/resource-manager/resource_manager.cpp index cd0381b7189aa5..6ec89197b0ed8a 100644 --- a/cloud/src/resource-manager/resource_manager.cpp +++ b/cloud/src/resource-manager/resource_manager.cpp @@ -461,7 +461,7 @@ std::pair ResourceManager::add_cluster(const std:: static bool is_sql_node_exceeded_safe_drop_time(const NodeInfoPB& node) { int64_t ctime = node.ctime(); // protect time 5mins - int64_t exceed_time = 5 * 60; + int64_t exceed_time = config::sql_node_safe_drop_time_seconds; TEST_SYNC_POINT_CALLBACK("resource_manager::set_safe_drop_time", &exceed_time); exceed_time = ctime + exceed_time; auto now_time = std::chrono::system_clock::now(); @@ -471,7 +471,7 @@ static bool is_sql_node_exceeded_safe_drop_time(const NodeInfoPB& node) { } std::pair ResourceManager::drop_cluster( - const std::string& instance_id, const ClusterInfo& cluster) { + const std::string& instance_id, const ClusterInfo& cluster, bool safe_drop_on_sql_cluster) { std::stringstream ss; std::string msg; @@ -521,7 +521,7 @@ std::pair ResourceManager::drop_cluster( for (auto& i : instance.clusters()) { ++idx; if (i.cluster_id() == cluster.cluster.cluster_id()) { - if (i.type() == ClusterPB::SQL) { + if (i.type() == ClusterPB::SQL && safe_drop_on_sql_cluster) { for (auto& fe_node : i.nodes()) { // check drop fe cluster if (!is_sql_node_exceeded_safe_drop_time(fe_node)) { diff --git a/cloud/src/resource-manager/resource_manager.h b/cloud/src/resource-manager/resource_manager.h index 7d411e9059f6dc..60d735b9e61d97 100644 --- a/cloud/src/resource-manager/resource_manager.h +++ b/cloud/src/resource-manager/resource_manager.h @@ -78,10 +78,12 @@ class ResourceManager { * Drops a cluster * * @param cluster cluster to drop, only cluster name and cluster id are concered + * @param safe_drop if true, check if cluster is drop in safe time * @return empty string for success, otherwise failure reason returned */ virtual std::pair drop_cluster(const std::string& instance_id, - const ClusterInfo& cluster); + const ClusterInfo& cluster, + bool safe_drop_on_sql_cluster = true); /** * Update a cluster diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index 8c07dd83299172..59557e83fecc89 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -1119,6 +1119,8 @@ message AlterClusterRequest { optional Operation op = 4; // for SQL mode rename cluster, rename to cluster name eq instance empty cluster name, need drop empty cluster optional bool replace_if_existing_empty_target_cluster = 5; + // if true, check if drop_cluster op on sql type cluster is in safe time + optional bool safe_drop_on_sql_cluster = 6 [default = true]; } message AlterClusterResponse { From bc7138b3e60621da38d4382e3de8b4cf1387f19f Mon Sep 17 00:00:00 2001 From: lambxu Date: Tue, 29 Apr 2025 14:58:36 +0800 Subject: [PATCH 2/4] format --- cloud/src/common/config.h | 2 +- cloud/src/meta-service/meta_service_resource.cpp | 3 ++- cloud/src/resource-manager/resource_manager.h | 8 ++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 837d20b9a44051..7f1da8803ec5f5 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -94,7 +94,7 @@ CONF_mInt64(delete_bitmap_storage_optimize_check_version_gap, "1000"); CONF_mInt32(scan_instances_interval_seconds, "60"); // 1min // interval for check object CONF_mInt32(check_object_interval_seconds, "43200"); // 12hours -CONF_mInt32(sql_node_safe_drop_time_seconds, "300");// 5min +CONF_mInt32(sql_node_safe_drop_time_seconds, "300"); // 5min CONF_mInt64(check_recycle_task_interval_seconds, "600"); // 10min CONF_mInt64(recycler_sleep_before_scheduling_seconds, "60"); diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp index a88e1db8310dc3..6e6183c22b26b4 100644 --- a/cloud/src/meta-service/meta_service_resource.cpp +++ b/cloud/src/meta-service/meta_service_resource.cpp @@ -2073,7 +2073,8 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, } break; case AlterClusterRequest::DROP_CLUSTER: { bool safe_drop_on_sql_cluster = request->has_safe_drop_on_sql_cluster() - ? request->safe_drop_on_sql_cluster() : true; + ? request->safe_drop_on_sql_cluster() + : true; auto r = resource_mgr_->drop_cluster(instance_id, cluster, safe_drop_on_sql_cluster); code = r.first; msg = r.second; diff --git a/cloud/src/resource-manager/resource_manager.h b/cloud/src/resource-manager/resource_manager.h index 60d735b9e61d97..57a5db03bfc331 100644 --- a/cloud/src/resource-manager/resource_manager.h +++ b/cloud/src/resource-manager/resource_manager.h @@ -78,12 +78,12 @@ class ResourceManager { * Drops a cluster * * @param cluster cluster to drop, only cluster name and cluster id are concered - * @param safe_drop if true, check if cluster is drop in safe time + * @param safe_drop_on_sql_cluster if true, check if sql-cluster is drop in safe time * @return empty string for success, otherwise failure reason returned */ - virtual std::pair drop_cluster(const std::string& instance_id, - const ClusterInfo& cluster, - bool safe_drop_on_sql_cluster = true); + virtual std::pair drop_cluster( + const std::string& instance_id, const ClusterInfo& cluster, + bool safe_drop_on_sql_cluster = true); /** * Update a cluster From 264f6a6cb82a5993516198e304dfee13e36fefba Mon Sep 17 00:00:00 2001 From: lambxu Date: Tue, 29 Apr 2025 15:08:58 +0800 Subject: [PATCH 3/4] not use default value --- gensrc/proto/cloud.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index 59557e83fecc89..9c508579754060 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -1120,7 +1120,7 @@ message AlterClusterRequest { // for SQL mode rename cluster, rename to cluster name eq instance empty cluster name, need drop empty cluster optional bool replace_if_existing_empty_target_cluster = 5; // if true, check if drop_cluster op on sql type cluster is in safe time - optional bool safe_drop_on_sql_cluster = 6 [default = true]; + optional bool safe_drop_on_sql_cluster = 6; } message AlterClusterResponse { From 98259ef1b4a54a130ab371661907ff5d6ac8e876 Mon Sep 17 00:00:00 2001 From: lambxu Date: Thu, 8 May 2025 02:06:53 +0800 Subject: [PATCH 4/4] fix-cloud-ut --- cloud/test/mock_resource_manager.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cloud/test/mock_resource_manager.h b/cloud/test/mock_resource_manager.h index 25b0d5fbb4bc71..aa2a19f886d1b2 100644 --- a/cloud/test/mock_resource_manager.h +++ b/cloud/test/mock_resource_manager.h @@ -46,8 +46,9 @@ class MockResourceManager : public ResourceManager { return std::make_pair(MetaServiceCode::OK, ""); } - std::pair drop_cluster(const std::string& instance_id, - const ClusterInfo& cluster) override { + std::pair drop_cluster( + const std::string& instance_id, const ClusterInfo& cluster, + bool safe_drop_on_sql_cluster = true) override { return std::make_pair(MetaServiceCode::OK, ""); } @@ -74,4 +75,4 @@ class MockResourceManager : public ResourceManager { const std::vector& to_del) override { return ""; } -}; \ No newline at end of file +};