diff --git a/ocaml/sdk-gen/c/gen_c_binding.ml b/ocaml/sdk-gen/c/gen_c_binding.ml index ebf5acae159..7f27cff3385 100644 --- a/ocaml/sdk-gen/c/gen_c_binding.ml +++ b/ocaml/sdk-gen/c/gen_c_binding.ml @@ -346,7 +346,7 @@ and gen_impl cls = ; ("async_params", `A (List.map paramJson (asyncParams msg))) ; ("msg_params", `A (List.map paramJson msg.msg_params)) ; ("abstract_result_type", `String (result_type msg)) - ; ("has_params", `Bool (List.length msg.msg_params <> 0)) + ; ("has_params", `Bool (msg.msg_params <> [])) ; ("param_count", `String (string_of_int (List.length msg.msg_params))) ; ("has_result", `Bool (String.compare (result_type msg) "" <> 0)) ; ("init_result", `Bool (init_result msg)) diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 524d7ab07a5..c2b77993be5 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -212,12 +212,29 @@ let ballooning_enabled = "ballooning.enabled" let redo_log_enabled = "redo_log.enabled" (* Valid cluster stack values *) -let ha_cluster_stack = "ha_cluster_stack" +module Ha_cluster_stack = struct + type t = Xhad | Corosync -let default_smapiv3_cluster_stack = "corosync" + let key = "ha_cluster_stack" -(* Note: default without clustering is in !Xapi_globs.default_cluster_stack *) -let supported_smapiv3_cluster_stacks = ["corosync"] + let to_string = function Xhad -> "xhad" | Corosync -> "corosync" + + let of_string = function + | "xhad" -> + Some Xhad + | "corosync" -> + Some Corosync + | _ -> + None +end + +let ha_cluster_stack = Ha_cluster_stack.key + +let default_cluster_stack = Ha_cluster_stack.(to_string Xhad) + +let default_smapiv3_cluster_stack = Ha_cluster_stack.(to_string Corosync) + +let supported_smapiv3_cluster_stacks = [default_smapiv3_cluster_stack] (* Set in the local db to cause us to emit an alert when we come up as a master after a transition or HA failover *) diff --git a/ocaml/xapi/create_storage.ml b/ocaml/xapi/create_storage.ml index 19aff8ecbbd..edc55020325 100644 --- a/ocaml/xapi/create_storage.ml +++ b/ocaml/xapi/create_storage.ml @@ -79,24 +79,20 @@ let maybe_create_pbd rpc session_id sr device_config me = (fun self -> Client.PBD.get_host ~rpc ~session_id ~self = me) pbds in - (* Check not more than 1 pbd in the database *) - let pbds = - if List.length pbds > 1 then ( - (* shouldn't happen... delete all but first pbd to make db consistent again *) - List.iter - (fun pbd -> Client.PBD.destroy ~rpc ~session_id ~self:pbd) - (List.tl pbds) ; - [List.hd pbds] - ) else - pbds - in - if pbds = [] (* If there's no PBD, create it *) then + let create () : [`PBD] Ref.t = Client.PBD.create ~rpc ~session_id ~host:me ~sR:sr ~device_config ~other_config:[] - else - List.hd pbds - -(* Otherwise, return the current one *) + in + (* Ensure there's a single PBD *) + match pbds with + | [] -> + ignore (create ()) + | [_] -> + () + | _ :: pbds -> + (* shouldn't happen... delete all but first pbd to make db consistent + again *) + List.iter (fun pbd -> Client.PBD.destroy ~rpc ~session_id ~self:pbd) pbds let maybe_remove_tools_sr rpc session_id __context = let srs = Db.SR.get_all ~__context in @@ -153,17 +149,13 @@ let initialise_storage (me : API.ref_host) rpc session_id __context : unit = List.filter (fun (_, pbd_rec) -> pbd_rec.API.pBD_host = master) pbds in let maybe_create_pbd_for_shared_sr s = - let _, mpbd_rec = - List.find (fun (_, pbdr) -> pbdr.API.pBD_SR = s) master_pbds - in - let master_devconf = mpbd_rec.API.pBD_device_config in - let my_devconf = List.remove_assoc "SRmaster" master_devconf in - (* this should never be used *) - maybe_create_pbd rpc session_id s my_devconf me + List.find_opt (fun (_, pbdr) -> pbdr.API.pBD_SR = s) master_pbds + |> Option.iter @@ fun (_, mpbd_rec) -> + let master_devconf = mpbd_rec.API.pBD_device_config in + let my_devconf = List.remove_assoc "SRmaster" master_devconf in + try maybe_create_pbd rpc session_id s my_devconf me with _ -> () in - List.iter - (fun s -> try ignore (maybe_create_pbd_for_shared_sr s) with _ -> ()) - shared_sr_refs + List.iter maybe_create_pbd_for_shared_sr shared_sr_refs in let other_config = try @@ -173,9 +165,8 @@ let initialise_storage (me : API.ref_host) rpc session_id __context : unit = in if not - (List.mem_assoc Xapi_globs.sync_create_pbds other_config - && List.assoc Xapi_globs.sync_create_pbds other_config - = Xapi_globs.sync_switch_off + (List.assoc_opt Xapi_globs.sync_create_pbds other_config + = Some Xapi_globs.sync_switch_off ) then ( debug "Creating PBDs for shared SRs" ; diff --git a/ocaml/xapi/localdb.ml b/ocaml/xapi/localdb.ml index 3382c42e32a..7658e58523b 100644 --- a/ocaml/xapi/localdb.ml +++ b/ocaml/xapi/localdb.ml @@ -64,17 +64,27 @@ exception Missing_key of string let m = Mutex.create () let get (key : string) = + let __FUN = __FUNCTION__ in + let ( let* ) = Option.bind in with_lock m (fun () -> - assert_loaded () ; - match Hashtbl.find_opt db key with - | Some x -> - x - | None -> - raise (Missing_key key) + let* () = + try assert_loaded () ; Some () + with e -> + warn "%s: unexpected error, ignoring it: %s" __FUN + (Printexc.to_string e) ; + None + in + Hashtbl.find_opt db key ) -let get_with_default (key : string) (default : string) = - try get key with Missing_key _ -> default +let get_exn key = + match get key with Some x -> x | None -> raise (Missing_key key) + +let get_of_string of_string key = Option.bind (get key) of_string + +let get_bool key = get_of_string bool_of_string_opt key + +let get_int key = get_of_string int_of_string_opt key (* Returns true if a change was made and should be flushed *) let put_one (key : string) (v : string) = diff --git a/ocaml/xapi/localdb.mli b/ocaml/xapi/localdb.mli index 3608241fc10..ae087aaa2e3 100644 --- a/ocaml/xapi/localdb.mli +++ b/ocaml/xapi/localdb.mli @@ -18,12 +18,24 @@ (** Thrown when a particular named key could not be found. *) exception Missing_key of string -val get : string -> string +val get : string -> string option (** Retrieves a value *) -val get_with_default : string -> string -> string -(** [get_with_default key default] returns the value associated with [key], - or [default] if the key is missing. *) +val get_exn : string -> string +(** Retrieves the value for the key, raises Missing_key when the key is not + present *) + +val get_bool : string -> bool option +(** Retrieves the value for the key, returns a value when it's found and is a + valid boolean, otherwise is [None] *) + +val get_int : string -> int option +(** Retrieves the value for the key, returns a value when it's found and is a + valid int, otherwise is [None] *) + +val get_of_string : (string -> 'a option) -> string -> 'a option +(** [get_of_string of_string key] retrieves the value for [key], and if it + exists, processes it with [of_string], otherwise it's [None] *) val put : string -> string -> unit (** Inserts a value into the database, only returns when the insertion has diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index b1f6a2875ab..24ec680d3e1 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -232,7 +232,7 @@ let sync ~__context ~self ~token ~token_id ~username ~password = Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> let config_repo config = - if List.length config <> 0 then (* Set params to yum/dnf *) + if config <> [] then (* Set params to yum/dnf *) let Pkg_mgr.{cmd; params} = Pkgs.config_repo ~repo_name ~config in ignore (Helpers.call_script ~log_output:Helpers.On_failure cmd params) diff --git a/ocaml/xapi/system_domains.ml b/ocaml/xapi/system_domains.ml index 0453c205566..c610424aeef 100644 --- a/ocaml/xapi/system_domains.ml +++ b/ocaml/xapi/system_domains.ml @@ -148,36 +148,6 @@ let is_in_use ~__context ~self = else false -(* [wait_for ?timeout f] returns true if [f()] (called at 1Hz) returns true within - the [timeout] period and false otherwise *) -let wait_for ?(timeout = 120.) f = - let start = Unix.gettimeofday () in - let finished = ref false in - let success = ref false in - while not !finished do - let remaining = timeout -. (Unix.gettimeofday () -. start) in - if remaining < 0. then - finished := true - else - try - if f () then ( - success := true ; - finished := true - ) else - Thread.delay 1. - with _ -> Thread.delay 1. - done ; - !success - -let pingable ip () = - try - let (_ : string * string) = - Forkhelpers.execute_command_get_output "/bin/ping" - ["-c"; "1"; "-w"; "1"; ip] - in - true - with _ -> false - let queryable ~__context transport () = let open Xmlrpc_client in let tracing = Context.set_client_span __context in @@ -197,47 +167,6 @@ let queryable ~__context transport () = (Printexc.to_string e) ; false -let ip_of ~__context driver = - (* Find the VIF on the Host internal management network *) - let vifs = Db.VM.get_VIFs ~__context ~self:driver in - let hin = Helpers.get_host_internal_management_network ~__context in - let ip = - let vif = - try - List.find - (fun vif -> Db.VIF.get_network ~__context ~self:vif = hin) - vifs - with Not_found -> - failwith - (Printf.sprintf - "driver domain %s has no VIF on host internal management network" - (Ref.string_of driver) - ) - in - match Xapi_udhcpd.get_ip ~__context vif with - | Some (a, b, c, d) -> - Printf.sprintf "%d.%d.%d.%d" a b c d - | None -> - failwith - (Printf.sprintf - "driver domain %s has no IP on the host internal management \ - network" - (Ref.string_of driver) - ) - in - info "driver domain uuid:%s ip:%s" (Db.VM.get_uuid ~__context ~self:driver) ip ; - if not (wait_for (pingable ip)) then - failwith - (Printf.sprintf "driver domain %s is not responding to IP ping" - (Ref.string_of driver) - ) ; - if not (wait_for (queryable ~__context (Xmlrpc_client.TCP (ip, 80)))) then - failwith - (Printf.sprintf "driver domain %s is not responding to XMLRPC query" - (Ref.string_of driver) - ) ; - ip - type service = {uuid: string; ty: string; instance: string; url: string} [@@deriving rpc] diff --git a/ocaml/xapi/system_domains.mli b/ocaml/xapi/system_domains.mli index 36881ad865a..33df12a6f54 100644 --- a/ocaml/xapi/system_domains.mli +++ b/ocaml/xapi/system_domains.mli @@ -51,9 +51,6 @@ val is_in_use : __context:Context.t -> self:API.ref_VM -> bool val queryable : __context:Context.t -> Xmlrpc_client.transport -> unit -> bool (** [queryable ip port ()] returns true if [ip]:[port] responsds to an XMLRPC query *) -val ip_of : __context:Context.t -> API.ref_VM -> string -(** [ip_of __context vm] returns the IP of the given VM on the internal management network *) - (** One of many service running in a driver domain *) type service = {uuid: string; ty: string; instance: string; url: string} diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 09ce5ba9550..ab04cde143d 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -288,12 +288,9 @@ let synchronize_certificates_with_coordinator ~__context = (* Make sure the local database can be read *) let init_local_database () = - ( try - let (_ : string) = Localdb.get Constants.ha_armed in - () - with Localdb.Missing_key _ -> - Localdb.put Constants.ha_armed "false" ; - debug "%s = 'false' (by default)" Constants.ha_armed + if Option.is_none (Localdb.get_bool Constants.ha_armed) then ( + Localdb.put Constants.ha_armed "false" ; + debug "%s = 'false' (by default)" Constants.ha_armed ) ; (* Add the local session check hook *) Session_check.check_local_session_hook := @@ -519,13 +516,14 @@ let start_ha () = (** Enable and load the redo log if we are the master, the local-DB flag is set * and HA is disabled *) let start_redo_log () = + let redo_log_enabled () = + Localdb.get_bool Constants.redo_log_enabled |> Option.value ~default:false + in + let ha_armed () = + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false + in try - if - Pool_role.is_master () - && bool_of_string - (Localdb.get_with_default Constants.redo_log_enabled "false") - && not (bool_of_string (Localdb.get Constants.ha_armed)) - then ( + if Pool_role.is_master () && redo_log_enabled () && not (ha_armed ()) then ( debug "Redo log was enabled when shutting down, so restarting it" ; Static_vdis.reattempt_on_boot_attach () ; (* enable the use of the redo log *) @@ -610,7 +608,7 @@ let resynchronise_ha_state () = let pool = Helpers.get_pool ~__context in let pool_ha_enabled = Db.Pool.get_ha_enabled ~__context ~self:pool in let local_ha_enabled = - bool_of_string (Localdb.get Constants.ha_armed) + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false in match (local_ha_enabled, pool_ha_enabled) with | true, true -> diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index fcbc9174e9d..e7853ef53f2 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -858,7 +858,7 @@ let migration_https_only = ref true let cluster_stack_root = ref "/usr/libexec/xapi/cluster-stack" -let cluster_stack_default = ref "xhad" +let cluster_stack_default = ref Constants.default_cluster_stack let xen_cmdline_path = ref "/opt/xensource/libexec/xen-cmdline" diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index 4d7714a8b83..1b62a99d250 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -42,6 +42,10 @@ let ha_redo_log = (*********************************************************************************************) (* Interface with the low-level HA subsystem *) +exception Address_not_found of string + +exception Uuid_not_found of string + (** Returns the current live set info *) let query_liveset () = let txt = call_script ~log_output:On_failure ha_query_liveset [] in @@ -76,8 +80,11 @@ let propose_master () = (** Returns true if local failover decisions have not been disabled on this node *) let local_failover_decisions_are_ok () = - try not (bool_of_string (Localdb.get Constants.ha_disable_failover_decisions)) - with _ -> true + let disabled = + Localdb.get_bool Constants.ha_disable_failover_decisions + |> Option.value ~default:false + in + not disabled (** Since the liveset info doesn't include the host IP address, we persist these ourselves *) let write_uuid_to_ip_mapping ~__context = @@ -91,36 +98,40 @@ let write_uuid_to_ip_mapping ~__context = (** Since the liveset info doesn't include the host IP address, we persist these ourselves *) let get_uuid_to_ip_mapping () = - let v = Localdb.get Constants.ha_peers in - String_unmarshall_helper.map (fun x -> x) (fun x -> x) v + match Localdb.get Constants.ha_peers with + | Some peers -> + String_unmarshall_helper.map (fun k -> k) (fun v -> v) peers + | None -> + [] (** Without using the Pool's database, returns the IP address of a particular host named by UUID. *) let address_of_host_uuid uuid = let table = get_uuid_to_ip_mapping () in - if not (List.mem_assoc uuid table) then ( - error "Failed to find the IP address of host UUID %s" uuid ; - raise Not_found - ) else - List.assoc uuid table + let uuid_not_found = Uuid_not_found uuid in + List.assoc_opt uuid table |> Option.to_result ~none:uuid_not_found (** Without using the Pool's database, returns the UUID of a particular host named by heartbeat IP address. This is only necesary because the liveset info doesn't include the host IP address *) let uuid_of_host_address address = let table = List.map (fun (k, v) -> (v, k)) (get_uuid_to_ip_mapping ()) in - match List.assoc_opt address table with - | None -> - error "Failed to find the UUID address of host with address %s" address ; - raise Not_found - | Some uuid_str -> ( - match Uuidx.of_string uuid_str with - | None -> - error "Failed parse UUID of host with address %s" address ; - raise (Invalid_argument "Invalid UUID") - | Some uuid -> - uuid - ) + let invalid_uuid = Invalid_argument "Invalid UUID" in + let address_not_found = Address_not_found address in + let to_uuid str = + Uuidx.of_string str |> Option.to_result ~none:invalid_uuid + in + List.assoc_opt address table + |> Option.to_result ~none:address_not_found + |> Fun.flip Result.bind to_uuid + +let ok_or_raise map_error = function Ok v -> v | Error exn -> map_error exn + +let master_address_exn __FUN e = + let exn = Printexc.to_string e in + let msg = Printf.sprintf "unable to gather the coordinator's IP: %s" exn in + error "%s: %s" __FUN msg ; + raise Api_errors.(Server_error (internal_error, [msg])) (** Called in two circumstances: 1. When I started up I thought I was the master but my proposal was rejected by the @@ -139,7 +150,9 @@ let on_master_failure () = done in let become_slave_of uuid = - let address = address_of_host_uuid uuid in + let address = + address_of_host_uuid uuid |> ok_or_raise (master_address_exn __FUNCTION__) + in info "This node will become the slave of host %s (%s)" uuid address ; Xapi_pool_transition.become_another_masters_slave address ; (* XXX CA-16388: prevent blocking *) @@ -164,19 +177,17 @@ let on_master_failure () = "ha_can_not_be_master_on_next_boot set: I cannot be master; looking \ for another master" ; let liveset = query_liveset () in + let open Xha_interface.LiveSetInformation in match Hashtbl.fold (fun uuid host acc -> - if - host.Xha_interface.LiveSetInformation.Host.master - && host.Xha_interface.LiveSetInformation.Host.liveness - (* CP-25481: a dead host may still have the master lock *) - then + (* CP-25481: a dead host may still have the master lock *) + if host.Host.master && host.Host.liveness then uuid :: acc else acc ) - liveset.Xha_interface.LiveSetInformation.hosts [] + liveset.hosts [] with | [] -> info "no other master exists yet; waiting 5 seconds and retrying" ; @@ -191,6 +202,18 @@ let on_master_failure () = ) done +let master_uuid_exn __FUN e = + let exn = Printexc.to_string e in + let msg = Printf.sprintf "unable to gather the coordinator's UUID: %s" exn in + error "%s: %s" __FUN msg ; + raise Api_errors.(Server_error (internal_error, [msg])) + +let master_not_in_liveset_exn __FUN e = + let exn = Printexc.to_string e in + let msg = Printf.sprintf "unable to gather the coordinator's info: %s" exn in + error "%s: %s" __FUN msg ; + raise Api_errors.(Server_error (internal_error, [msg])) + module Timeouts = struct type t = { heart_beat_interval: int @@ -303,7 +326,7 @@ module Monitor = struct let statefiles = Xha_statefile.list_existing_statefiles () in debug "HA background thread starting" ; (* Grab the base timeout value so we can cook the reported latencies *) - let base_t = int_of_string (Localdb.get Constants.ha_base_t) in + let base_t = int_of_string (Localdb.get_exn Constants.ha_base_t) in let timeouts = Timeouts.derive base_t in (* Set up our per-host alert triggers *) let localhost_uuid = Helpers.get_localhost_uuid () in @@ -457,16 +480,20 @@ module Monitor = struct (* WARNING: must not touch the database or perform blocking I/O *) let process_liveset_on_slave liveset = let address = Pool_role.get_master_address () in - let master_uuid = uuid_of_host_address address in + let master_uuid = + uuid_of_host_address address + |> ok_or_raise (master_uuid_exn __FUNCTION__) + in + let open Xha_interface.LiveSetInformation in + let uuid_not_found = + Uuid_not_found (Uuidx.to_string master_uuid) + in let master_info = - Hashtbl.find liveset.Xha_interface.LiveSetInformation.hosts - master_uuid + Hashtbl.find_opt liveset.hosts master_uuid + |> Option.to_result ~none:uuid_not_found + |> ok_or_raise (master_not_in_liveset_exn __FUNCTION__) in - if - true - && master_info.Xha_interface.LiveSetInformation.Host.liveness - && master_info.Xha_interface.LiveSetInformation.Host.master - then + if master_info.Host.liveness && master_info.Host.master then debug "The node we think is the master is still alive and marked \ as master; this is OK" @@ -501,8 +528,6 @@ module Monitor = struct ) in - (* let planned_for = Int64.to_int (Db.Pool.get_ha_plan_exists_for ~__context ~self:pool) in *) - (* First consider whether VM failover actions need to happen. Convert the liveset into a list of Host references used by the VM failover code *) let liveset_uuids = @@ -629,11 +654,9 @@ module Monitor = struct (* and yet has no statefile access *) in let all_live_nodes_lost_statefile = - List.fold_left ( && ) true - (List.map - (fun (_, xha_host) -> relying_on_rule_2 xha_host) - host_host_table - ) + List.for_all + (fun (_, xha_host) -> relying_on_rule_2 xha_host) + host_host_table in warning_all_live_nodes_lost_statefile all_live_nodes_lost_statefile ; @@ -969,7 +992,10 @@ let redo_log_ha_enabled_at_startup () = let update_ha_firewalld_service status = (* Only xha needs to enable firewalld service. Other HA cluster stacks don't need. *) - if Localdb.get Constants.ha_cluster_stack = !Xapi_globs.cluster_stack_default + if + Localdb.get Constants.ha_cluster_stack + |> Option.value ~default:!Xapi_globs.cluster_stack_default + = Constants.Ha_cluster_stack.(to_string Xhad) then let module Fw = ( val Firewall.firewall_provider !Xapi_globs.firewall_backend @@ -984,8 +1010,10 @@ let ha_start_daemon () = () let on_server_restart () = - let armed = bool_of_string (Localdb.get Constants.ha_armed) in - if armed then ( + let armed () = + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false + in + if armed () then ( debug "HA is supposed to be armed" ; (* Make sure daemons are up *) let finished = ref false in @@ -993,10 +1021,7 @@ let on_server_restart () = XXX we might need some kind of user-override *) while not !finished do (* If someone has called Host.emergency_ha_disable in the background then we notice the change here *) - if - not - (try bool_of_string (Localdb.get Constants.ha_armed) with _ -> false) - then ( + if not (armed ()) then ( warn "ha_start_daemon aborted because someone has called \ Host.emergency_ha_disable" ; @@ -1147,7 +1172,7 @@ let ha_stop_daemon __context _localhost = let emergency_ha_disable __context soft = let ha_armed = - try bool_of_string (Localdb.get Constants.ha_armed) with _ -> false + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false in if not ha_armed then if soft then @@ -1385,6 +1410,7 @@ let preconfigure_host __context localhost statevdis metadata_vdi generation = Localdb.put Constants.ha_base_t (string_of_int base_t) let join_liveset __context host = + let __FUN = __FUNCTION__ in info "Host.ha_join_liveset host = %s" (Ref.string_of host) ; ha_start_daemon () ; Localdb.put Constants.ha_disable_failover_decisions "false" ; @@ -1402,7 +1428,10 @@ let join_liveset __context host = (* If this host is a slave then we must wait to confirm that the master manages to assert itself, otherwise our monitoring thread might attempt a hostile takeover *) let master_address = Pool_role.get_master_address () in - let master_uuid = uuid_of_host_address master_address in + let master_uuid = + uuid_of_host_address master_address + |> ok_or_raise (master_uuid_exn __FUN) + in let master_found = ref false in while not !master_found do (* It takes a non-trivial amount of time for the master to assert itself: we might @@ -1410,30 +1439,24 @@ let join_liveset __context host = should wait. *) Thread.delay 5. ; let liveset = query_liveset () in - debug "Liveset: %s" - (Xha_interface.LiveSetInformation.to_summary_string liveset) ; - if - liveset.Xha_interface.LiveSetInformation.status - = Xha_interface.LiveSetInformation.Status.Online - then + let open Xha_interface.LiveSetInformation in + debug "Liveset: %s" (to_summary_string liveset) ; + if liveset.status = Status.Online then (* 'master' is the node we believe should become the xHA-level master initially *) let master = - Hashtbl.find liveset.Xha_interface.LiveSetInformation.hosts - master_uuid + Hashtbl.find_opt liveset.hosts master_uuid + |> Option.to_result ~none:Not_found + |> ok_or_raise (master_not_in_liveset_exn __FUN) in - if master.Xha_interface.LiveSetInformation.Host.master then ( + if master.Host.master then ( info "existing master has successfully asserted itself" ; master_found := true (* loop will terminate *) ) else if false - || (not master.Xha_interface.LiveSetInformation.Host.liveness) - || master - .Xha_interface.LiveSetInformation.Host.state_file_corrupted - || (not - master - .Xha_interface.LiveSetInformation.Host.state_file_access - ) - || master.Xha_interface.LiveSetInformation.Host.excluded + || (not master.Host.liveness) + || master.Host.state_file_corrupted + || (not master.Host.state_file_access) + || master.Host.excluded then ( error "Existing master has failed during HA enable process" ; failwith "Existing master failed during HA enable process" @@ -1869,10 +1892,7 @@ let enable __context heartbeat_srs configuration = with _ -> false in if not alive then - raise - (Api_errors.Server_error - (Api_errors.host_offline, [Ref.string_of host]) - ) + raise Api_errors.(Server_error (host_offline, [Ref.string_of host])) ) (Db.Host.get_all ~__context) ; let pool = Helpers.get_pool ~__context in @@ -1897,20 +1917,23 @@ let enable __context heartbeat_srs configuration = else heartbeat_srs in - if possible_srs = [] then - raise (Api_errors.Server_error (Api_errors.cannot_create_state_file, [])) ; - (* For the moment we'll create a state file in one compatible SR since the xHA component only handles one *) - let srs = [List.hd possible_srs] in + (* For the moment we'll create a state file in one compatible SR since the + xHA component only handles one *) + let sr = + match possible_srs with + | [] -> + raise Api_errors.(Server_error (cannot_create_state_file, [])) + | sr :: _ -> + sr + in List.iter (fun sr -> let vdi = Xha_statefile.find_or_create ~__context ~sr ~cluster_stack in statefile_vdis := vdi :: !statefile_vdis ) - srs ; + [sr] ; (* For storing the database, assume there is only one SR *) - let database_vdi = - Xha_metadata_vdi.find_or_create ~__context ~sr:(List.hd srs) - in + let database_vdi = Xha_metadata_vdi.find_or_create ~__context ~sr in database_vdis := database_vdi :: !database_vdis ; (* Record the statefile UUIDs in the Pool.ha_statefile set *) Db.Pool.set_ha_statefiles ~__context ~self:pool @@ -1991,14 +2014,16 @@ let enable __context heartbeat_srs configuration = (ExnHelper.string_of_exn e) ) errors ; - if errors <> [] then ( - (* Perform a disable since the pool HA state isn't consistent *) - error "Attempting to disable HA pool-wide" ; - Helpers.log_exn_continue - "Disabling HA after a failure joining all hosts to the liveset" - disable_internal __context ; - raise (snd (List.hd errors)) - ) ; + List.iter + (fun (_, exn) -> + (* Perform a disable since the pool HA state isn't consistent *) + error "Attempting to disable HA pool-wide" ; + Helpers.log_exn_continue + "Disabling HA after a failure joining all hosts to the liveset" + disable_internal __context ; + raise exn + ) + errors ; (* We have to set the HA enabled flag before forcing a database resynchronisation *) Db.Pool.set_ha_enabled ~__context ~self:pool ~value:true ; debug "HA enabled" ; @@ -2036,13 +2061,16 @@ let enable __context heartbeat_srs configuration = (ExnHelper.string_of_exn e) ) errors ; - if errors <> [] then ( - (* Perform a disable since the pool HA state isn't consistent *) - error "Attempting to disable HA pool-wide" ; - Helpers.log_exn_continue "Disabling HA after a failure during enable" - disable_internal __context ; - raise (snd (List.hd errors)) - ) ; + List.iter + (fun (_, exn) -> + (* Perform a disable since the pool HA state isn't consistent *) + error "Attempting to disable HA pool-wide" ; + Helpers.log_exn_continue + "Disabling HA after a failure during enable" disable_internal + __context ; + raise exn + ) + errors ; (* Update the allowed_operations on the HA volumes to prevent people thinking they can mess with them *) List.iter (fun vdi -> Xapi_vdi.update_allowed_operations ~__context ~self:vdi) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 90062c7785e..1699c698965 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -79,8 +79,8 @@ let assert_safe_to_reenable ~__context ~self ~user_request = Repository_helpers.assert_no_host_pending_mandatory_guidance ~__context ~host:self ; let host_disabled_until_reboot = - try bool_of_string (Localdb.get Constants.host_disabled_until_reboot) - with _ -> false + Localdb.get_bool Constants.host_disabled_until_reboot + |> Option.value ~default:false in if host_disabled_until_reboot then raise @@ -88,7 +88,7 @@ let assert_safe_to_reenable ~__context ~self ~user_request = (Api_errors.host_disabled_until_reboot, [Ref.string_of self]) ) ; let host_auto_enable = - try bool_of_string (Localdb.get Constants.host_auto_enable) with _ -> true + Localdb.get_bool Constants.host_auto_enable |> Option.value ~default:true in if (not host_auto_enable) && not user_request then raise @@ -3417,9 +3417,15 @@ let update_firewalld_service_status ~__context = | Xenha -> (* Only xha needs to enable firewalld service. Other HA cluster stacks don't need. *) - bool_of_string (Localdb.get Constants.ha_armed) - && Localdb.get Constants.ha_cluster_stack - = !Xapi_globs.cluster_stack_default + let is_armed () = + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false + in + let uses_xhad () = + Localdb.get Constants.ha_cluster_stack + |> Option.value ~default:!Xapi_globs.cluster_stack_default + = Constants.Ha_cluster_stack.(to_string Xhad) + in + is_armed () && uses_xhad () in List.iter (fun s -> if is_enabled s then enable_firewalld_service s) diff --git a/ocaml/xapi/xapi_host_crashdump.ml b/ocaml/xapi/xapi_host_crashdump.ml index 02e8303d777..b7e9eedd74e 100644 --- a/ocaml/xapi/xapi_host_crashdump.ml +++ b/ocaml/xapi/xapi_host_crashdump.ml @@ -71,8 +71,8 @@ let resynchronise ~__context ~host = let gone_away = Listext.List.set_difference db_filenames real_filenames and arrived = Listext.List.set_difference real_filenames db_filenames in let was_shutdown_cleanly = - try bool_of_string (Localdb.get Constants.host_restarted_cleanly) - with _ -> false + Localdb.get_bool Constants.host_restarted_cleanly + |> Option.value ~default:false in Localdb.put Constants.host_restarted_cleanly "false" ; (* If HA is enabled AND no crashdump appeared AND we weren't shutdown cleanly then assume it was a fence. *) diff --git a/ocaml/xapi/xapi_host_helpers.ml b/ocaml/xapi/xapi_host_helpers.ml index da837f9329a..de1805082b8 100644 --- a/ocaml/xapi/xapi_host_helpers.ml +++ b/ocaml/xapi/xapi_host_helpers.ml @@ -380,7 +380,7 @@ let consider_enabling_host_nolock ~__context = Disabled hosts are excluded from the HA planning calculations. Otherwise a host may boot, fail to plug in a PBD and cause all protected VMs to suddenly become non-agile. *) let ha_enabled = - try bool_of_string (Localdb.get Constants.ha_armed) with _ -> false + Localdb.get_bool Constants.ha_armed |> Option.value ~default:false in let localhost = Helpers.get_localhost ~__context in let pbds = Db.Host.get_PBDs ~__context ~self:localhost in @@ -423,8 +423,7 @@ let consider_enabling_host_nolock ~__context = f () in let host_auto_enable = - try bool_of_string (Localdb.get Constants.host_auto_enable) - with _ -> true + Localdb.get_bool Constants.host_auto_enable |> Option.value ~default:true in if !Xapi_globs.on_system_boot then ( debug "Host.enabled: system has just restarted" ; @@ -451,8 +450,8 @@ let consider_enabling_host_nolock ~__context = until manually re-enabled by the user" ) ) else if - try bool_of_string (Localdb.get Constants.host_disabled_until_reboot) - with _ -> false + Localdb.get_bool Constants.host_disabled_until_reboot + |> Option.value ~default:false then debug "Host.enabled: system not just rebooted but host_disabled_until_reboot \ diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index ccbd779d462..cbb39e28adb 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1880,15 +1880,14 @@ let exchange_ca_certificates_on_join ~__context ~import ~export : in Cert_distrib.exchange_ca_certificates_with_joiner ~__context ~import ~export -(* Assume that db backed up from master will be there and ready to go... *) let emergency_transition_to_master ~__context = - if Localdb.get Constants.ha_armed = "true" then - raise (Api_errors.Server_error (Api_errors.ha_is_enabled, [])) ; + if Localdb.get_bool Constants.ha_armed |> Option.value ~default:false then + raise Api_errors.(Server_error (ha_is_enabled, [])) ; Xapi_pool_transition.become_master () let emergency_reset_master ~__context ~master_address = - if Localdb.get Constants.ha_armed = "true" then - raise (Api_errors.Server_error (Api_errors.ha_is_enabled, [])) ; + if Localdb.get_bool Constants.ha_armed |> Option.value ~default:false then + raise Api_errors.(Server_error (ha_is_enabled, [])) ; let master_address = Helpers.gethostbyname master_address in Xapi_pool_transition.become_another_masters_slave master_address diff --git a/ocaml/xapi/xapi_pool_transition.ml b/ocaml/xapi/xapi_pool_transition.ml index 8f6a315f591..a35b736c3f7 100644 --- a/ocaml/xapi/xapi_pool_transition.ml +++ b/ocaml/xapi/xapi_pool_transition.ml @@ -62,7 +62,7 @@ let run_external_scripts becoming_master = order in let already_run = - try bool_of_string (Localdb.get Constants.master_scripts) with _ -> false + Localdb.get_bool Constants.master_scripts |> Option.value ~default:false in (* Only do anything if we're switching mode *) if already_run <> becoming_master then ( @@ -228,8 +228,8 @@ let become_another_masters_slave master_address = (** If we just transitioned slave -> master (as indicated by the localdb flag) then generate a single alert *) let consider_sending_alert __context () = if - try bool_of_string (Localdb.get Constants.this_node_just_became_master) - with _ -> false + Localdb.get_bool Constants.this_node_just_became_master + |> Option.value ~default:false then let obj_uuid = Helpers.get_localhost_uuid () in let name, priority = Api_messages.pool_master_transition in diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index b3d3ec8488d..14290421fb4 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -507,9 +507,7 @@ let check_operation_error ~__context ~ref = (* if other operations are in progress, check that the new operation is allowed concurrently with them. *) let current_error = check current_error (fun () -> - if - List.length current_ops <> 0 - && not (is_allowed_concurrently ~op ~current_ops) + if current_ops <> [] && not (is_allowed_concurrently ~op ~current_ops) then report_concurrent_operations_error ~current_ops ~ref_str else diff --git a/ocaml/xapi/xha_interface.ml b/ocaml/xapi/xha_interface.ml index 0935e06619d..82ac381519d 100644 --- a/ocaml/xapi/xha_interface.ml +++ b/ocaml/xapi/xha_interface.ml @@ -32,8 +32,7 @@ let xml_element_has_name name element = (** Returns the first element with the specified name from the given element list. *) let first_xml_element_with_name elements name = - try Some (List.find (xml_element_has_name name) elements) - with Not_found -> None + List.find_opt (xml_element_has_name name) elements (** Parses an XML element of the form "value". Returns a (name, value) string pair, where the arguments diff --git a/ocaml/xapi/xha_scripts.ml b/ocaml/xapi/xha_scripts.ml index c8f87e412c1..669664112a5 100644 --- a/ocaml/xapi/xha_scripts.ml +++ b/ocaml/xapi/xha_scripts.ml @@ -17,7 +17,7 @@ module D = Debug.Make (struct let name = "xapi_ha" end) open D let ha_dir () = - let stack = Localdb.get Constants.ha_cluster_stack in + let stack = Localdb.get_exn Constants.ha_cluster_stack in Filename.concat !Xapi_globs.cluster_stack_root stack let ha_set_pool_state = "ha_set_pool_state" diff --git a/quality-gate.sh b/quality-gate.sh index 84919c51a1c..b2345f75ef7 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=274 + N=268 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" @@ -110,7 +110,7 @@ unixgetenv () { } hashtblfind () { - N=35 + N=33 # Looks for all .ml files except the ones using Core.Hashtbl.find, # which already returns Option HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) @@ -130,7 +130,9 @@ unnecessary-length () { UNNECESSARY_LENGTH=$(local_grep "List.length.*=+\s*0") UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*=+\s*List.length"))) UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s>\s*0"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s<>\s*0"))) UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*<\s*List.length"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*<>\s*List.length"))) UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s<\s*1"))) UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "1\s*>\s*List.length"))) if [ "$UNNECESSARY_LENGTH" -eq "$N" ]; then