CA-394109: Reduce number of alerts

Currently there are two ways of raising an alert when a cluster host joins/leaves the cluster: 1. through the api call such as cluster-host-leave; 2. through the cluster watcher which checks for updates from `corosync-notifyd`. This will cause all alerts to be raised twice. This patch favours the second approach as it accounts for "unclean" leave of a cluster host. Moreover, a "clean" leave triggered by an API will cause a change in `corosync-notifyd` which will get detected by the watcher anyway. This solves the double alerting problem. Signed-off-by: Vincent Liu <shuntian.liu2@cloud.com>
xapi-project · Jun 18, 2024 · c0fe06a · c0fe06a
1 parent 242e626
commit c0fe06a
Showing 1 changed file with 0 additions and 19 deletions.
diff --git a/ocaml/xapi/xapi_cluster_host.ml b/ocaml/xapi/xapi_cluster_host.ml
@@ -13,7 +13,6 @@
  *)
 
 open Xapi_clustering
-open Xapi_cluster_helpers
 open Ipaddr_rpc_type
 
 module D = Debug.Make (struct let name = "xapi_cluster_host" end)
@@ -55,20 +54,6 @@ let call_api_function_with_alert ~__context ~msg ~cls ~obj_uuid ~body
         raise err
   )
 
-let alert_for_cluster_host ~__context ~cluster_host ~missing_hosts ~new_hosts =
-  let num_hosts = Db.Cluster_host.get_all ~__context |> List.length in
-  let cluster = Db.Cluster_host.get_cluster ~__context ~self:cluster_host in
-  let quorum = Db.Cluster.get_quorum ~__context ~self:cluster |> Int64.to_int in
-  maybe_generate_alert ~__context ~missing_hosts ~new_hosts ~num_hosts ~quorum
-
-let alert_for_cluster_host_leave ~__context ~cluster_host =
-  alert_for_cluster_host ~__context ~cluster_host ~missing_hosts:[cluster_host]
-    ~new_hosts:[]
-
-let alert_for_cluster_host_join ~__context ~cluster_host =
-  alert_for_cluster_host ~__context ~cluster_host ~missing_hosts:[]
-    ~new_hosts:[cluster_host]
-
 (* Create xapi db object for cluster_host, resync_host calls clusterd *)
 let create_internal ~__context ~cluster ~host ~pIF : API.ref_Cluster_host =
   with_clustering_lock __LOC__ (fun () ->
@@ -81,7 +66,6 @@ let create_internal ~__context ~cluster ~host ~pIF : API.ref_Cluster_host =
         ~enabled:false ~current_operations:[] ~allowed_operations:[]
         ~other_config:[] ~joined:false ~live:false
         ~last_update_live:API.Date.epoch ;
-      alert_for_cluster_host_join ~__context ~cluster_host:ref ;
       ref
   )
 
@@ -274,14 +258,12 @@ let destroy_op ~__context ~self ~force =
       let result = local_fn (rpc ~__context) dbg in
       match Idl.IdM.run @@ Cluster_client.IDL.T.get result with
       | Ok () ->
-          alert_for_cluster_host_leave ~__context ~cluster_host:self ;
           Db.Cluster_host.destroy ~__context ~self ;
           debug "Cluster_host.%s was successful" fn_str ;
           Xapi_clustering.Daemon.disable ~__context
       | Error error ->
           warn "Error occurred during Cluster_host.%s" fn_str ;
           if force then (
-            alert_for_cluster_host_leave ~__context ~cluster_host:self ;
             let ref_str = Ref.string_of self in
             Db.Cluster_host.destroy ~__context ~self ;
             debug "Cluster_host %s force destroyed." ref_str
@@ -329,7 +311,6 @@ let forget ~__context ~self =
           Db.Cluster.set_pending_forget ~__context ~self:cluster ~value:[] ;
           (* must not disable the daemon here, because we declared another unreachable node dead,
            * not the current one *)
-          alert_for_cluster_host_leave ~__context ~cluster_host:self ;
           Db.Cluster_host.destroy ~__context ~self ;
           debug "Cluster_host.forget was successful"
       | Error error ->