From b48e85e533baf494a1a28f2cf01752d18b548b45 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 16 Oct 2025 11:10:49 +0100 Subject: [PATCH 1/7] xapi: remove some usages of common, exception-raising functions The Not_found and hd exceptions keep popping up, and it's difficult to find them when there are no backtraces logged. Remove usages if them, even if they are not problematic so the actual problematic ones can be flushed out over time. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/create_storage.ml | 49 ++++++++++-------------- ocaml/xapi/system_domains.ml | 71 ----------------------------------- ocaml/xapi/system_domains.mli | 3 -- ocaml/xapi/xapi_ha.ml | 59 ++++++++++++++++------------- ocaml/xapi/xha_interface.ml | 3 +- quality-gate.sh | 2 +- 6 files changed, 54 insertions(+), 133 deletions(-) diff --git a/ocaml/xapi/create_storage.ml b/ocaml/xapi/create_storage.ml index 19aff8ecbbd..edc55020325 100644 --- a/ocaml/xapi/create_storage.ml +++ b/ocaml/xapi/create_storage.ml @@ -79,24 +79,20 @@ let maybe_create_pbd rpc session_id sr device_config me = (fun self -> Client.PBD.get_host ~rpc ~session_id ~self = me) pbds in - (* Check not more than 1 pbd in the database *) - let pbds = - if List.length pbds > 1 then ( - (* shouldn't happen... delete all but first pbd to make db consistent again *) - List.iter - (fun pbd -> Client.PBD.destroy ~rpc ~session_id ~self:pbd) - (List.tl pbds) ; - [List.hd pbds] - ) else - pbds - in - if pbds = [] (* If there's no PBD, create it *) then + let create () : [`PBD] Ref.t = Client.PBD.create ~rpc ~session_id ~host:me ~sR:sr ~device_config ~other_config:[] - else - List.hd pbds - -(* Otherwise, return the current one *) + in + (* Ensure there's a single PBD *) + match pbds with + | [] -> + ignore (create ()) + | [_] -> + () + | _ :: pbds -> + (* shouldn't happen... delete all but first pbd to make db consistent + again *) + List.iter (fun pbd -> Client.PBD.destroy ~rpc ~session_id ~self:pbd) pbds let maybe_remove_tools_sr rpc session_id __context = let srs = Db.SR.get_all ~__context in @@ -153,17 +149,13 @@ let initialise_storage (me : API.ref_host) rpc session_id __context : unit = List.filter (fun (_, pbd_rec) -> pbd_rec.API.pBD_host = master) pbds in let maybe_create_pbd_for_shared_sr s = - let _, mpbd_rec = - List.find (fun (_, pbdr) -> pbdr.API.pBD_SR = s) master_pbds - in - let master_devconf = mpbd_rec.API.pBD_device_config in - let my_devconf = List.remove_assoc "SRmaster" master_devconf in - (* this should never be used *) - maybe_create_pbd rpc session_id s my_devconf me + List.find_opt (fun (_, pbdr) -> pbdr.API.pBD_SR = s) master_pbds + |> Option.iter @@ fun (_, mpbd_rec) -> + let master_devconf = mpbd_rec.API.pBD_device_config in + let my_devconf = List.remove_assoc "SRmaster" master_devconf in + try maybe_create_pbd rpc session_id s my_devconf me with _ -> () in - List.iter - (fun s -> try ignore (maybe_create_pbd_for_shared_sr s) with _ -> ()) - shared_sr_refs + List.iter maybe_create_pbd_for_shared_sr shared_sr_refs in let other_config = try @@ -173,9 +165,8 @@ let initialise_storage (me : API.ref_host) rpc session_id __context : unit = in if not - (List.mem_assoc Xapi_globs.sync_create_pbds other_config - && List.assoc Xapi_globs.sync_create_pbds other_config - = Xapi_globs.sync_switch_off + (List.assoc_opt Xapi_globs.sync_create_pbds other_config + = Some Xapi_globs.sync_switch_off ) then ( debug "Creating PBDs for shared SRs" ; diff --git a/ocaml/xapi/system_domains.ml b/ocaml/xapi/system_domains.ml index 0453c205566..c610424aeef 100644 --- a/ocaml/xapi/system_domains.ml +++ b/ocaml/xapi/system_domains.ml @@ -148,36 +148,6 @@ let is_in_use ~__context ~self = else false -(* [wait_for ?timeout f] returns true if [f()] (called at 1Hz) returns true within - the [timeout] period and false otherwise *) -let wait_for ?(timeout = 120.) f = - let start = Unix.gettimeofday () in - let finished = ref false in - let success = ref false in - while not !finished do - let remaining = timeout -. (Unix.gettimeofday () -. start) in - if remaining < 0. then - finished := true - else - try - if f () then ( - success := true ; - finished := true - ) else - Thread.delay 1. - with _ -> Thread.delay 1. - done ; - !success - -let pingable ip () = - try - let (_ : string * string) = - Forkhelpers.execute_command_get_output "/bin/ping" - ["-c"; "1"; "-w"; "1"; ip] - in - true - with _ -> false - let queryable ~__context transport () = let open Xmlrpc_client in let tracing = Context.set_client_span __context in @@ -197,47 +167,6 @@ let queryable ~__context transport () = (Printexc.to_string e) ; false -let ip_of ~__context driver = - (* Find the VIF on the Host internal management network *) - let vifs = Db.VM.get_VIFs ~__context ~self:driver in - let hin = Helpers.get_host_internal_management_network ~__context in - let ip = - let vif = - try - List.find - (fun vif -> Db.VIF.get_network ~__context ~self:vif = hin) - vifs - with Not_found -> - failwith - (Printf.sprintf - "driver domain %s has no VIF on host internal management network" - (Ref.string_of driver) - ) - in - match Xapi_udhcpd.get_ip ~__context vif with - | Some (a, b, c, d) -> - Printf.sprintf "%d.%d.%d.%d" a b c d - | None -> - failwith - (Printf.sprintf - "driver domain %s has no IP on the host internal management \ - network" - (Ref.string_of driver) - ) - in - info "driver domain uuid:%s ip:%s" (Db.VM.get_uuid ~__context ~self:driver) ip ; - if not (wait_for (pingable ip)) then - failwith - (Printf.sprintf "driver domain %s is not responding to IP ping" - (Ref.string_of driver) - ) ; - if not (wait_for (queryable ~__context (Xmlrpc_client.TCP (ip, 80)))) then - failwith - (Printf.sprintf "driver domain %s is not responding to XMLRPC query" - (Ref.string_of driver) - ) ; - ip - type service = {uuid: string; ty: string; instance: string; url: string} [@@deriving rpc] diff --git a/ocaml/xapi/system_domains.mli b/ocaml/xapi/system_domains.mli index 36881ad865a..33df12a6f54 100644 --- a/ocaml/xapi/system_domains.mli +++ b/ocaml/xapi/system_domains.mli @@ -51,9 +51,6 @@ val is_in_use : __context:Context.t -> self:API.ref_VM -> bool val queryable : __context:Context.t -> Xmlrpc_client.transport -> unit -> bool (** [queryable ip port ()] returns true if [ip]:[port] responsds to an XMLRPC query *) -val ip_of : __context:Context.t -> API.ref_VM -> string -(** [ip_of __context vm] returns the IP of the given VM on the internal management network *) - (** One of many service running in a driver domain *) type service = {uuid: string; ty: string; instance: string; url: string} diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index 4d7714a8b83..5b1c51e5675 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -1869,10 +1869,7 @@ let enable __context heartbeat_srs configuration = with _ -> false in if not alive then - raise - (Api_errors.Server_error - (Api_errors.host_offline, [Ref.string_of host]) - ) + raise Api_errors.(Server_error (host_offline, [Ref.string_of host])) ) (Db.Host.get_all ~__context) ; let pool = Helpers.get_pool ~__context in @@ -1897,20 +1894,23 @@ let enable __context heartbeat_srs configuration = else heartbeat_srs in - if possible_srs = [] then - raise (Api_errors.Server_error (Api_errors.cannot_create_state_file, [])) ; - (* For the moment we'll create a state file in one compatible SR since the xHA component only handles one *) - let srs = [List.hd possible_srs] in + (* For the moment we'll create a state file in one compatible SR since the + xHA component only handles one *) + let sr = + match possible_srs with + | [] -> + raise Api_errors.(Server_error (cannot_create_state_file, [])) + | sr :: _ -> + sr + in List.iter (fun sr -> let vdi = Xha_statefile.find_or_create ~__context ~sr ~cluster_stack in statefile_vdis := vdi :: !statefile_vdis ) - srs ; + [sr] ; (* For storing the database, assume there is only one SR *) - let database_vdi = - Xha_metadata_vdi.find_or_create ~__context ~sr:(List.hd srs) - in + let database_vdi = Xha_metadata_vdi.find_or_create ~__context ~sr in database_vdis := database_vdi :: !database_vdis ; (* Record the statefile UUIDs in the Pool.ha_statefile set *) Db.Pool.set_ha_statefiles ~__context ~self:pool @@ -1991,14 +1991,16 @@ let enable __context heartbeat_srs configuration = (ExnHelper.string_of_exn e) ) errors ; - if errors <> [] then ( - (* Perform a disable since the pool HA state isn't consistent *) - error "Attempting to disable HA pool-wide" ; - Helpers.log_exn_continue - "Disabling HA after a failure joining all hosts to the liveset" - disable_internal __context ; - raise (snd (List.hd errors)) - ) ; + List.iter + (fun (_, exn) -> + (* Perform a disable since the pool HA state isn't consistent *) + error "Attempting to disable HA pool-wide" ; + Helpers.log_exn_continue + "Disabling HA after a failure joining all hosts to the liveset" + disable_internal __context ; + raise exn + ) + errors ; (* We have to set the HA enabled flag before forcing a database resynchronisation *) Db.Pool.set_ha_enabled ~__context ~self:pool ~value:true ; debug "HA enabled" ; @@ -2036,13 +2038,16 @@ let enable __context heartbeat_srs configuration = (ExnHelper.string_of_exn e) ) errors ; - if errors <> [] then ( - (* Perform a disable since the pool HA state isn't consistent *) - error "Attempting to disable HA pool-wide" ; - Helpers.log_exn_continue "Disabling HA after a failure during enable" - disable_internal __context ; - raise (snd (List.hd errors)) - ) ; + List.iter + (fun (_, exn) -> + (* Perform a disable since the pool HA state isn't consistent *) + error "Attempting to disable HA pool-wide" ; + Helpers.log_exn_continue + "Disabling HA after a failure during enable" disable_internal + __context ; + raise exn + ) + errors ; (* Update the allowed_operations on the HA volumes to prevent people thinking they can mess with them *) List.iter (fun vdi -> Xapi_vdi.update_allowed_operations ~__context ~self:vdi) diff --git a/ocaml/xapi/xha_interface.ml b/ocaml/xapi/xha_interface.ml index 0935e06619d..82ac381519d 100644 --- a/ocaml/xapi/xha_interface.ml +++ b/ocaml/xapi/xha_interface.ml @@ -32,8 +32,7 @@ let xml_element_has_name name element = (** Returns the first element with the specified name from the given element list. *) let first_xml_element_with_name elements name = - try Some (List.find (xml_element_has_name name) elements) - with Not_found -> None + List.find_opt (xml_element_has_name name) elements (** Parses an XML element of the form "value". Returns a (name, value) string pair, where the arguments diff --git a/quality-gate.sh b/quality-gate.sh index 84919c51a1c..1a99145da96 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=274 + N=268 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" From 1e6ad2e74d7fbcf6dfb551adc3ab69c0d33b0059 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 30 Oct 2025 10:44:46 +0100 Subject: [PATCH 2/7] xapi-consts: use a variant to reify valid values of cluster stack Until now, the valid string values were not written anywhere, change the situation. Ideally this would be done using an enum in the idl, but unfortunately this changes types of existing parameters in API calls, so it's quite risky. Instead have a conservative approach to only enumerate the valid values and make Constants the only source of truth for these values, including default ones, which were scatterred around previously. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-consts/constants.ml | 25 +++++++++++++++++++++---- ocaml/xapi/xapi_globs.ml | 2 +- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 524d7ab07a5..c2b77993be5 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -212,12 +212,29 @@ let ballooning_enabled = "ballooning.enabled" let redo_log_enabled = "redo_log.enabled" (* Valid cluster stack values *) -let ha_cluster_stack = "ha_cluster_stack" +module Ha_cluster_stack = struct + type t = Xhad | Corosync -let default_smapiv3_cluster_stack = "corosync" + let key = "ha_cluster_stack" -(* Note: default without clustering is in !Xapi_globs.default_cluster_stack *) -let supported_smapiv3_cluster_stacks = ["corosync"] + let to_string = function Xhad -> "xhad" | Corosync -> "corosync" + + let of_string = function + | "xhad" -> + Some Xhad + | "corosync" -> + Some Corosync + | _ -> + None +end + +let ha_cluster_stack = Ha_cluster_stack.key + +let default_cluster_stack = Ha_cluster_stack.(to_string Xhad) + +let default_smapiv3_cluster_stack = Ha_cluster_stack.(to_string Corosync) + +let supported_smapiv3_cluster_stacks = [default_smapiv3_cluster_stack] (* Set in the local db to cause us to emit an alert when we come up as a master after a transition or HA failover *) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index fcbc9174e9d..e7853ef53f2 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -858,7 +858,7 @@ let migration_https_only = ref true let cluster_stack_root = ref "/usr/libexec/xapi/cluster-stack" -let cluster_stack_default = ref "xhad" +let cluster_stack_default = ref Constants.default_cluster_stack let xen_cmdline_path = ref "/opt/xensource/libexec/xen-cmdline" From 2a5abc50e758b5bae2f03715936cddbde1ca44ef Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 30 Oct 2025 10:51:02 +0100 Subject: [PATCH 3/7] xapi: open firewall for XHAd only if xHAd is actually selected Previously it was opened if the default stack was selected, but this could actually be different from XHAd Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_ha.ml | 4 +++- ocaml/xapi/xapi_host.ml | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index 5b1c51e5675..c0a37501208 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -969,7 +969,9 @@ let redo_log_ha_enabled_at_startup () = let update_ha_firewalld_service status = (* Only xha needs to enable firewalld service. Other HA cluster stacks don't need. *) - if Localdb.get Constants.ha_cluster_stack = !Xapi_globs.cluster_stack_default + if + Localdb.get Constants.ha_cluster_stack + = Constants.Ha_cluster_stack.(to_string Xhad) then let module Fw = ( val Firewall.firewall_provider !Xapi_globs.firewall_backend diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 90062c7785e..e0ab0b696b1 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -3417,9 +3417,12 @@ let update_firewalld_service_status ~__context = | Xenha -> (* Only xha needs to enable firewalld service. Other HA cluster stacks don't need. *) - bool_of_string (Localdb.get Constants.ha_armed) - && Localdb.get Constants.ha_cluster_stack - = !Xapi_globs.cluster_stack_default + let is_armed () = bool_of_string (Localdb.get Constants.ha_armed) in + let uses_xhad () = + Localdb.get Constants.ha_cluster_stack + = Constants.Ha_cluster_stack.(to_string Xhad) + in + is_armed () && uses_xhad () in List.iter (fun s -> if is_enabled s then enable_firewalld_service s) From 71aea4dcd7890c38c4f7dd197bbfbee2efe998d8 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 16 Oct 2025 15:44:25 +0100 Subject: [PATCH 4/7] localdb: expose option-based get functions Allows callers to avoid exceptions, the previous get is now called get_exn so it's clear which users have to be have to be mindful of exceptions. Not all get_exn were converted to get, previous behaviour was widespread, and doing the change without changing behaviour is not trivial, better to do it only once its detected. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/localdb.ml | 26 +++++++++++++------ ocaml/xapi/localdb.mli | 20 ++++++++++++--- ocaml/xapi/xapi.ml | 24 ++++++++---------- ocaml/xapi/xapi_ha.ml | 40 ++++++++++++++++-------------- ocaml/xapi/xapi_host.ml | 11 +++++--- ocaml/xapi/xapi_host_crashdump.ml | 4 +-- ocaml/xapi/xapi_host_helpers.ml | 9 +++---- ocaml/xapi/xapi_pool.ml | 9 +++---- ocaml/xapi/xapi_pool_transition.ml | 6 ++--- ocaml/xapi/xha_scripts.ml | 2 +- 10 files changed, 87 insertions(+), 64 deletions(-) diff --git a/ocaml/xapi/localdb.ml b/ocaml/xapi/localdb.ml index 3382c42e32a..7658e58523b 100644 --- a/ocaml/xapi/localdb.ml +++ b/ocaml/xapi/localdb.ml @@ -64,17 +64,27 @@ exception Missing_key of string let m = Mutex.create () let get (key : string) = + let __FUN = __FUNCTION__ in + let ( let* ) = Option.bind in with_lock m (fun () -> - assert_loaded () ; - match Hashtbl.find_opt db key with - | Some x -> - x - | None -> - raise (Missing_key key) + let* () = + try assert_loaded () ; Some () + with e -> + warn "%s: unexpected error, ignoring it: %s" __FUN + (Printexc.to_string e) ; + None + in + Hashtbl.find_opt db key ) -let get_with_default (key : string) (default : string) = - try get key with Missing_key _ -> default +let get_exn key = + match get key with Some x -> x | None -> raise (Missing_key key) + +let get_of_string of_string key = Option.bind (get key) of_string + +let get_bool key = get_of_string bool_of_string_opt key + +let get_int key = get_of_string int_of_string_opt key (* Returns true if a change was made and should be flushed *) let put_one (key : string) (v : string) = diff --git a/ocaml/xapi/localdb.mli b/ocaml/xapi/localdb.mli index 3608241fc10..ae087aaa2e3 100644 --- a/ocaml/xapi/localdb.mli +++ b/ocaml/xapi/localdb.mli @@ -18,12 +18,24 @@ (** Thrown when a particular named key could not be found. *) exception Missing_key of string -val get : string -> string +val get : string -> string option (** Retrieves a value *) -val get_with_default : string -> string -> string -(** [get_with_default key default] returns the value associated with [key], - or [default] if the key is missing. *) +val get_exn : string -> string +(** Retrieves the value for the key, raises Missing_key when the key is not + present *) + +val get_bool : string -> bool option +(** Retrieves the value for the key, returns a value when it's found and is a + valid boolean, otherwise is [None] *) + +val get_int : string -> int option +(** Retrieves the value for the key, returns a value when it's found and is a + valid int, otherwise is [None] *) + +val get_of_string : (string -> 'a option) -> string -> 'a option +(** [get_of_string of_string key] retrieves the value for [key], and if it + exists, processes it with [of_string], otherwise it's [None] *) val put : string -> string -> unit (** Inserts a value into the database, only returns when the insertion has diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 09ce5ba9550..ab04cde143d 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -288,12 +288,9 @@ let synchronize_certificates_with_coordinator ~__context = (* Make sure the local database can be read *) let init_local_database () = - ( try - let (_ : string) = Localdb.get Constants.ha_armed in - () - with Localdb.Missing_key _ -> - Localdb.put Constants.ha_armed "false" ; - debug "%s = 'false' (by default)" Constants.ha_armed + if Option.is_none (Localdb.get_bool Constants.ha_armed) then ( + Localdb.put Constants.ha_armed "false" ; + debug "%s = 'false' (by default)" Constants.ha_armed ) ; (* Add the local session check hook *) Session_check.check_local_session_hook := @@ -519,13 +516,14 @@ let start_ha () = (** Enable and load the redo log if we are the master, the local-DB flag is set * and HA is disabled *) let start_redo_log () = + let redo_log_enabled () = + Localdb.get_bool Constants.redo_log_enabled |> Option.value ~default:false + in + let ha_armed () = + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false + in try - if - Pool_role.is_master () - && bool_of_string - (Localdb.get_with_default Constants.redo_log_enabled "false") - && not (bool_of_string (Localdb.get Constants.ha_armed)) - then ( + if Pool_role.is_master () && redo_log_enabled () && not (ha_armed ()) then ( debug "Redo log was enabled when shutting down, so restarting it" ; Static_vdis.reattempt_on_boot_attach () ; (* enable the use of the redo log *) @@ -610,7 +608,7 @@ let resynchronise_ha_state () = let pool = Helpers.get_pool ~__context in let pool_ha_enabled = Db.Pool.get_ha_enabled ~__context ~self:pool in let local_ha_enabled = - bool_of_string (Localdb.get Constants.ha_armed) + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false in match (local_ha_enabled, pool_ha_enabled) with | true, true -> diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index c0a37501208..2ec62677884 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -76,8 +76,11 @@ let propose_master () = (** Returns true if local failover decisions have not been disabled on this node *) let local_failover_decisions_are_ok () = - try not (bool_of_string (Localdb.get Constants.ha_disable_failover_decisions)) - with _ -> true + let disabled = + Localdb.get_bool Constants.ha_disable_failover_decisions + |> Option.value ~default:false + in + not disabled (** Since the liveset info doesn't include the host IP address, we persist these ourselves *) let write_uuid_to_ip_mapping ~__context = @@ -91,8 +94,11 @@ let write_uuid_to_ip_mapping ~__context = (** Since the liveset info doesn't include the host IP address, we persist these ourselves *) let get_uuid_to_ip_mapping () = - let v = Localdb.get Constants.ha_peers in - String_unmarshall_helper.map (fun x -> x) (fun x -> x) v + match Localdb.get Constants.ha_peers with + | Some peers -> + String_unmarshall_helper.map (fun k -> k) (fun v -> v) peers + | None -> + [] (** Without using the Pool's database, returns the IP address of a particular host named by UUID. *) @@ -303,7 +309,7 @@ module Monitor = struct let statefiles = Xha_statefile.list_existing_statefiles () in debug "HA background thread starting" ; (* Grab the base timeout value so we can cook the reported latencies *) - let base_t = int_of_string (Localdb.get Constants.ha_base_t) in + let base_t = int_of_string (Localdb.get_exn Constants.ha_base_t) in let timeouts = Timeouts.derive base_t in (* Set up our per-host alert triggers *) let localhost_uuid = Helpers.get_localhost_uuid () in @@ -501,8 +507,6 @@ module Monitor = struct ) in - (* let planned_for = Int64.to_int (Db.Pool.get_ha_plan_exists_for ~__context ~self:pool) in *) - (* First consider whether VM failover actions need to happen. Convert the liveset into a list of Host references used by the VM failover code *) let liveset_uuids = @@ -629,11 +633,9 @@ module Monitor = struct (* and yet has no statefile access *) in let all_live_nodes_lost_statefile = - List.fold_left ( && ) true - (List.map - (fun (_, xha_host) -> relying_on_rule_2 xha_host) - host_host_table - ) + List.for_all + (fun (_, xha_host) -> relying_on_rule_2 xha_host) + host_host_table in warning_all_live_nodes_lost_statefile all_live_nodes_lost_statefile ; @@ -971,6 +973,7 @@ let update_ha_firewalld_service status = need. *) if Localdb.get Constants.ha_cluster_stack + |> Option.value ~default:!Xapi_globs.cluster_stack_default = Constants.Ha_cluster_stack.(to_string Xhad) then let module Fw = @@ -986,8 +989,10 @@ let ha_start_daemon () = () let on_server_restart () = - let armed = bool_of_string (Localdb.get Constants.ha_armed) in - if armed then ( + let armed () = + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false + in + if armed () then ( debug "HA is supposed to be armed" ; (* Make sure daemons are up *) let finished = ref false in @@ -995,10 +1000,7 @@ let on_server_restart () = XXX we might need some kind of user-override *) while not !finished do (* If someone has called Host.emergency_ha_disable in the background then we notice the change here *) - if - not - (try bool_of_string (Localdb.get Constants.ha_armed) with _ -> false) - then ( + if not (armed ()) then ( warn "ha_start_daemon aborted because someone has called \ Host.emergency_ha_disable" ; @@ -1149,7 +1151,7 @@ let ha_stop_daemon __context _localhost = let emergency_ha_disable __context soft = let ha_armed = - try bool_of_string (Localdb.get Constants.ha_armed) with _ -> false + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false in if not ha_armed then if soft then diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index e0ab0b696b1..1699c698965 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -79,8 +79,8 @@ let assert_safe_to_reenable ~__context ~self ~user_request = Repository_helpers.assert_no_host_pending_mandatory_guidance ~__context ~host:self ; let host_disabled_until_reboot = - try bool_of_string (Localdb.get Constants.host_disabled_until_reboot) - with _ -> false + Localdb.get_bool Constants.host_disabled_until_reboot + |> Option.value ~default:false in if host_disabled_until_reboot then raise @@ -88,7 +88,7 @@ let assert_safe_to_reenable ~__context ~self ~user_request = (Api_errors.host_disabled_until_reboot, [Ref.string_of self]) ) ; let host_auto_enable = - try bool_of_string (Localdb.get Constants.host_auto_enable) with _ -> true + Localdb.get_bool Constants.host_auto_enable |> Option.value ~default:true in if (not host_auto_enable) && not user_request then raise @@ -3417,9 +3417,12 @@ let update_firewalld_service_status ~__context = | Xenha -> (* Only xha needs to enable firewalld service. Other HA cluster stacks don't need. *) - let is_armed () = bool_of_string (Localdb.get Constants.ha_armed) in + let is_armed () = + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false + in let uses_xhad () = Localdb.get Constants.ha_cluster_stack + |> Option.value ~default:!Xapi_globs.cluster_stack_default = Constants.Ha_cluster_stack.(to_string Xhad) in is_armed () && uses_xhad () diff --git a/ocaml/xapi/xapi_host_crashdump.ml b/ocaml/xapi/xapi_host_crashdump.ml index 02e8303d777..b7e9eedd74e 100644 --- a/ocaml/xapi/xapi_host_crashdump.ml +++ b/ocaml/xapi/xapi_host_crashdump.ml @@ -71,8 +71,8 @@ let resynchronise ~__context ~host = let gone_away = Listext.List.set_difference db_filenames real_filenames and arrived = Listext.List.set_difference real_filenames db_filenames in let was_shutdown_cleanly = - try bool_of_string (Localdb.get Constants.host_restarted_cleanly) - with _ -> false + Localdb.get_bool Constants.host_restarted_cleanly + |> Option.value ~default:false in Localdb.put Constants.host_restarted_cleanly "false" ; (* If HA is enabled AND no crashdump appeared AND we weren't shutdown cleanly then assume it was a fence. *) diff --git a/ocaml/xapi/xapi_host_helpers.ml b/ocaml/xapi/xapi_host_helpers.ml index da837f9329a..de1805082b8 100644 --- a/ocaml/xapi/xapi_host_helpers.ml +++ b/ocaml/xapi/xapi_host_helpers.ml @@ -380,7 +380,7 @@ let consider_enabling_host_nolock ~__context = Disabled hosts are excluded from the HA planning calculations. Otherwise a host may boot, fail to plug in a PBD and cause all protected VMs to suddenly become non-agile. *) let ha_enabled = - try bool_of_string (Localdb.get Constants.ha_armed) with _ -> false + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false in let localhost = Helpers.get_localhost ~__context in let pbds = Db.Host.get_PBDs ~__context ~self:localhost in @@ -423,8 +423,7 @@ let consider_enabling_host_nolock ~__context = f () in let host_auto_enable = - try bool_of_string (Localdb.get Constants.host_auto_enable) - with _ -> true + Localdb.get_bool Constants.host_auto_enable |> Option.value ~default:true in if !Xapi_globs.on_system_boot then ( debug "Host.enabled: system has just restarted" ; @@ -451,8 +450,8 @@ let consider_enabling_host_nolock ~__context = until manually re-enabled by the user" ) ) else if - try bool_of_string (Localdb.get Constants.host_disabled_until_reboot) - with _ -> false + Localdb.get_bool Constants.host_disabled_until_reboot + |> Option.value ~default:false then debug "Host.enabled: system not just rebooted but host_disabled_until_reboot \ diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index ccbd779d462..cbb39e28adb 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1880,15 +1880,14 @@ let exchange_ca_certificates_on_join ~__context ~import ~export : in Cert_distrib.exchange_ca_certificates_with_joiner ~__context ~import ~export -(* Assume that db backed up from master will be there and ready to go... *) let emergency_transition_to_master ~__context = - if Localdb.get Constants.ha_armed = "true" then - raise (Api_errors.Server_error (Api_errors.ha_is_enabled, [])) ; + if Localdb.get_bool Constants.ha_armed |> Option.value ~default:false then + raise Api_errors.(Server_error (ha_is_enabled, [])) ; Xapi_pool_transition.become_master () let emergency_reset_master ~__context ~master_address = - if Localdb.get Constants.ha_armed = "true" then - raise (Api_errors.Server_error (Api_errors.ha_is_enabled, [])) ; + if Localdb.get_bool Constants.ha_armed |> Option.value ~default:false then + raise Api_errors.(Server_error (ha_is_enabled, [])) ; let master_address = Helpers.gethostbyname master_address in Xapi_pool_transition.become_another_masters_slave master_address diff --git a/ocaml/xapi/xapi_pool_transition.ml b/ocaml/xapi/xapi_pool_transition.ml index 8f6a315f591..a35b736c3f7 100644 --- a/ocaml/xapi/xapi_pool_transition.ml +++ b/ocaml/xapi/xapi_pool_transition.ml @@ -62,7 +62,7 @@ let run_external_scripts becoming_master = order in let already_run = - try bool_of_string (Localdb.get Constants.master_scripts) with _ -> false + Localdb.get_bool Constants.master_scripts |> Option.value ~default:false in (* Only do anything if we're switching mode *) if already_run <> becoming_master then ( @@ -228,8 +228,8 @@ let become_another_masters_slave master_address = (** If we just transitioned slave -> master (as indicated by the localdb flag) then generate a single alert *) let consider_sending_alert __context () = if - try bool_of_string (Localdb.get Constants.this_node_just_became_master) - with _ -> false + Localdb.get_bool Constants.this_node_just_became_master + |> Option.value ~default:false then let obj_uuid = Helpers.get_localhost_uuid () in let name, priority = Api_messages.pool_master_transition in diff --git a/ocaml/xapi/xha_scripts.ml b/ocaml/xapi/xha_scripts.ml index c8f87e412c1..669664112a5 100644 --- a/ocaml/xapi/xha_scripts.ml +++ b/ocaml/xapi/xha_scripts.ml @@ -17,7 +17,7 @@ module D = Debug.Make (struct let name = "xapi_ha" end) open D let ha_dir () = - let stack = Localdb.get Constants.ha_cluster_stack in + let stack = Localdb.get_exn Constants.ha_cluster_stack in Filename.concat !Xapi_globs.cluster_stack_root stack let ha_set_pool_state = "ha_set_pool_state" From 6bf27321a38460bea44e329b48ffaf35d5c34563 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 17 Oct 2025 16:06:09 +0100 Subject: [PATCH 5/7] xapi_ha: avoid raising Not_found when joining a liveset Joining the liveset has been found to raise `Not_found` in some cases. Transform these exceptions to others that are more readable and show the exact cause. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_ha.ml | 110 +++++++++++++++++++++++------------------- quality-gate.sh | 2 +- 2 files changed, 61 insertions(+), 51 deletions(-) diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index 2ec62677884..4aee4213ec8 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -104,29 +104,28 @@ let get_uuid_to_ip_mapping () = named by UUID. *) let address_of_host_uuid uuid = let table = get_uuid_to_ip_mapping () in - if not (List.mem_assoc uuid table) then ( - error "Failed to find the IP address of host UUID %s" uuid ; - raise Not_found - ) else - List.assoc uuid table + List.assoc_opt uuid table |> Option.to_result ~none:Not_found (** Without using the Pool's database, returns the UUID of a particular host named by heartbeat IP address. This is only necesary because the liveset info doesn't include the host IP address *) let uuid_of_host_address address = let table = List.map (fun (k, v) -> (v, k)) (get_uuid_to_ip_mapping ()) in - match List.assoc_opt address table with - | None -> - error "Failed to find the UUID address of host with address %s" address ; - raise Not_found - | Some uuid_str -> ( - match Uuidx.of_string uuid_str with - | None -> - error "Failed parse UUID of host with address %s" address ; - raise (Invalid_argument "Invalid UUID") - | Some uuid -> - uuid - ) + let invalid_uuid = Invalid_argument "Invalid UUID" in + let to_uuid str = + Uuidx.of_string str |> Option.to_result ~none:invalid_uuid + in + List.assoc_opt address table + |> Option.to_result ~none:Not_found + |> Fun.flip Result.bind to_uuid + +let ok_or_raise map_error = function Ok v -> v | Error exn -> map_error exn + +let master_address_exn __FUN e = + let exn = Printexc.to_string e in + let msg = Printf.sprintf "unable to gather the coordinator's IP: %s" exn in + error "%s: %s" __FUN msg ; + raise Api_errors.(Server_error (internal_error, [msg])) (** Called in two circumstances: 1. When I started up I thought I was the master but my proposal was rejected by the @@ -145,7 +144,9 @@ let on_master_failure () = done in let become_slave_of uuid = - let address = address_of_host_uuid uuid in + let address = + address_of_host_uuid uuid |> ok_or_raise (master_address_exn __FUNCTION__) + in info "This node will become the slave of host %s (%s)" uuid address ; Xapi_pool_transition.become_another_masters_slave address ; (* XXX CA-16388: prevent blocking *) @@ -170,19 +171,17 @@ let on_master_failure () = "ha_can_not_be_master_on_next_boot set: I cannot be master; looking \ for another master" ; let liveset = query_liveset () in + let open Xha_interface.LiveSetInformation in match Hashtbl.fold (fun uuid host acc -> - if - host.Xha_interface.LiveSetInformation.Host.master - && host.Xha_interface.LiveSetInformation.Host.liveness - (* CP-25481: a dead host may still have the master lock *) - then + (* CP-25481: a dead host may still have the master lock *) + if host.Host.master && host.Host.liveness then uuid :: acc else acc ) - liveset.Xha_interface.LiveSetInformation.hosts [] + liveset.hosts [] with | [] -> info "no other master exists yet; waiting 5 seconds and retrying" ; @@ -197,6 +196,18 @@ let on_master_failure () = ) done +let master_uuid_exn __FUN e = + let exn = Printexc.to_string e in + let msg = Printf.sprintf "unable to gather the coordinator's UUID: %s" exn in + error "%s: %s" __FUN msg ; + raise Api_errors.(Server_error (internal_error, [msg])) + +let master_not_in_liveset_exn __FUN e = + let exn = Printexc.to_string e in + let msg = Printf.sprintf "unable to gather the coordinator's info: %s" exn in + error "%s: %s" __FUN msg ; + raise Api_errors.(Server_error (internal_error, [msg])) + module Timeouts = struct type t = { heart_beat_interval: int @@ -463,16 +474,17 @@ module Monitor = struct (* WARNING: must not touch the database or perform blocking I/O *) let process_liveset_on_slave liveset = let address = Pool_role.get_master_address () in - let master_uuid = uuid_of_host_address address in + let master_uuid = + uuid_of_host_address address + |> ok_or_raise (master_uuid_exn __FUNCTION__) + in + let open Xha_interface.LiveSetInformation in let master_info = - Hashtbl.find liveset.Xha_interface.LiveSetInformation.hosts - master_uuid + Hashtbl.find_opt liveset.hosts master_uuid + |> Option.to_result ~none:Not_found + |> ok_or_raise (master_not_in_liveset_exn __FUNCTION__) in - if - true - && master_info.Xha_interface.LiveSetInformation.Host.liveness - && master_info.Xha_interface.LiveSetInformation.Host.master - then + if master_info.Host.liveness && master_info.Host.master then debug "The node we think is the master is still alive and marked \ as master; this is OK" @@ -1389,6 +1401,7 @@ let preconfigure_host __context localhost statevdis metadata_vdi generation = Localdb.put Constants.ha_base_t (string_of_int base_t) let join_liveset __context host = + let __FUN = __FUNCTION__ in info "Host.ha_join_liveset host = %s" (Ref.string_of host) ; ha_start_daemon () ; Localdb.put Constants.ha_disable_failover_decisions "false" ; @@ -1406,7 +1419,10 @@ let join_liveset __context host = (* If this host is a slave then we must wait to confirm that the master manages to assert itself, otherwise our monitoring thread might attempt a hostile takeover *) let master_address = Pool_role.get_master_address () in - let master_uuid = uuid_of_host_address master_address in + let master_uuid = + uuid_of_host_address master_address + |> ok_or_raise (master_uuid_exn __FUN) + in let master_found = ref false in while not !master_found do (* It takes a non-trivial amount of time for the master to assert itself: we might @@ -1414,30 +1430,24 @@ let join_liveset __context host = should wait. *) Thread.delay 5. ; let liveset = query_liveset () in - debug "Liveset: %s" - (Xha_interface.LiveSetInformation.to_summary_string liveset) ; - if - liveset.Xha_interface.LiveSetInformation.status - = Xha_interface.LiveSetInformation.Status.Online - then + let open Xha_interface.LiveSetInformation in + debug "Liveset: %s" (to_summary_string liveset) ; + if liveset.status = Status.Online then (* 'master' is the node we believe should become the xHA-level master initially *) let master = - Hashtbl.find liveset.Xha_interface.LiveSetInformation.hosts - master_uuid + Hashtbl.find_opt liveset.hosts master_uuid + |> Option.to_result ~none:Not_found + |> ok_or_raise (master_not_in_liveset_exn __FUN) in - if master.Xha_interface.LiveSetInformation.Host.master then ( + if master.Host.master then ( info "existing master has successfully asserted itself" ; master_found := true (* loop will terminate *) ) else if false - || (not master.Xha_interface.LiveSetInformation.Host.liveness) - || master - .Xha_interface.LiveSetInformation.Host.state_file_corrupted - || (not - master - .Xha_interface.LiveSetInformation.Host.state_file_access - ) - || master.Xha_interface.LiveSetInformation.Host.excluded + || (not master.Host.liveness) + || master.Host.state_file_corrupted + || (not master.Host.state_file_access) + || master.Host.excluded then ( error "Existing master has failed during HA enable process" ; failwith "Existing master failed during HA enable process" diff --git a/quality-gate.sh b/quality-gate.sh index 1a99145da96..9a2964f71c7 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -110,7 +110,7 @@ unixgetenv () { } hashtblfind () { - N=35 + N=33 # Looks for all .ml files except the ones using Core.Hashtbl.find, # which already returns Option HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) From 3122667b788626c4d05370f873804d12fc1b8846 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 21 Oct 2025 14:42:27 +0100 Subject: [PATCH 6/7] quality-gate: capture more unnecessary List.length cases Signed-off-by: Pau Ruiz Safont --- ocaml/sdk-gen/c/gen_c_binding.ml | 2 +- ocaml/xapi/repository.ml | 2 +- ocaml/xapi/xapi_vm_lifecycle.ml | 4 +--- quality-gate.sh | 2 ++ 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/sdk-gen/c/gen_c_binding.ml b/ocaml/sdk-gen/c/gen_c_binding.ml index ebf5acae159..7f27cff3385 100644 --- a/ocaml/sdk-gen/c/gen_c_binding.ml +++ b/ocaml/sdk-gen/c/gen_c_binding.ml @@ -346,7 +346,7 @@ and gen_impl cls = ; ("async_params", `A (List.map paramJson (asyncParams msg))) ; ("msg_params", `A (List.map paramJson msg.msg_params)) ; ("abstract_result_type", `String (result_type msg)) - ; ("has_params", `Bool (List.length msg.msg_params <> 0)) + ; ("has_params", `Bool (msg.msg_params <> [])) ; ("param_count", `String (string_of_int (List.length msg.msg_params))) ; ("has_result", `Bool (String.compare (result_type msg) "" <> 0)) ; ("init_result", `Bool (init_result msg)) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index b1f6a2875ab..24ec680d3e1 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -232,7 +232,7 @@ let sync ~__context ~self ~token ~token_id ~username ~password = Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> let config_repo config = - if List.length config <> 0 then (* Set params to yum/dnf *) + if config <> [] then (* Set params to yum/dnf *) let Pkg_mgr.{cmd; params} = Pkgs.config_repo ~repo_name ~config in ignore (Helpers.call_script ~log_output:Helpers.On_failure cmd params) diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index b3d3ec8488d..14290421fb4 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -507,9 +507,7 @@ let check_operation_error ~__context ~ref = (* if other operations are in progress, check that the new operation is allowed concurrently with them. *) let current_error = check current_error (fun () -> - if - List.length current_ops <> 0 - && not (is_allowed_concurrently ~op ~current_ops) + if current_ops <> [] && not (is_allowed_concurrently ~op ~current_ops) then report_concurrent_operations_error ~current_ops ~ref_str else diff --git a/quality-gate.sh b/quality-gate.sh index 9a2964f71c7..b2345f75ef7 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -130,7 +130,9 @@ unnecessary-length () { UNNECESSARY_LENGTH=$(local_grep "List.length.*=+\s*0") UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*=+\s*List.length"))) UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s>\s*0"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s<>\s*0"))) UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*<\s*List.length"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*<>\s*List.length"))) UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s<\s*1"))) UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "1\s*>\s*List.length"))) if [ "$UNNECESSARY_LENGTH" -eq "$N" ]; then From 17e0eddef3b69ea14158bc0589bb7a58f33d6d55 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 3 Nov 2025 15:15:40 +0000 Subject: [PATCH 7/7] xapi_ha: add more information to not found errors When joining a liveset, Not_found errors could be raised, but the users did not have information on what was that it could not be found. Add which IP address and which UUID could not be found. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_ha.ml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index 4aee4213ec8..1b62a99d250 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -42,6 +42,10 @@ let ha_redo_log = (*********************************************************************************************) (* Interface with the low-level HA subsystem *) +exception Address_not_found of string + +exception Uuid_not_found of string + (** Returns the current live set info *) let query_liveset () = let txt = call_script ~log_output:On_failure ha_query_liveset [] in @@ -104,7 +108,8 @@ let get_uuid_to_ip_mapping () = named by UUID. *) let address_of_host_uuid uuid = let table = get_uuid_to_ip_mapping () in - List.assoc_opt uuid table |> Option.to_result ~none:Not_found + let uuid_not_found = Uuid_not_found uuid in + List.assoc_opt uuid table |> Option.to_result ~none:uuid_not_found (** Without using the Pool's database, returns the UUID of a particular host named by heartbeat IP address. This is only necesary because the liveset info doesn't include @@ -112,11 +117,12 @@ let address_of_host_uuid uuid = let uuid_of_host_address address = let table = List.map (fun (k, v) -> (v, k)) (get_uuid_to_ip_mapping ()) in let invalid_uuid = Invalid_argument "Invalid UUID" in + let address_not_found = Address_not_found address in let to_uuid str = Uuidx.of_string str |> Option.to_result ~none:invalid_uuid in List.assoc_opt address table - |> Option.to_result ~none:Not_found + |> Option.to_result ~none:address_not_found |> Fun.flip Result.bind to_uuid let ok_or_raise map_error = function Ok v -> v | Error exn -> map_error exn @@ -479,9 +485,12 @@ module Monitor = struct |> ok_or_raise (master_uuid_exn __FUNCTION__) in let open Xha_interface.LiveSetInformation in + let uuid_not_found = + Uuid_not_found (Uuidx.to_string master_uuid) + in let master_info = Hashtbl.find_opt liveset.hosts master_uuid - |> Option.to_result ~none:Not_found + |> Option.to_result ~none:uuid_not_found |> ok_or_raise (master_not_in_liveset_exn __FUNCTION__) in if master_info.Host.liveness && master_info.Host.master then