Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CA-366309: ignore HA when checking update readiness #4714

Merged
merged 1 commit into from
May 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions ocaml/xapi/xapi_host.ml
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ let string_of_per_vm_plan p =

(** Return a table mapping VMs to 'per_vm_plan' types indicating either a target
Host or a reason why the VM cannot be migrated. *)
let compute_evacuation_plan_no_wlb ~__context ~host =
let compute_evacuation_plan_no_wlb ~__context ~host ?(ignore_ha = false) () =
let all_hosts = Db.Host.get_all ~__context in
let enabled_hosts =
List.filter (fun self -> Db.Host.get_enabled ~__context ~self) all_hosts
Expand Down Expand Up @@ -308,7 +308,7 @@ let compute_evacuation_plan_no_wlb ~__context ~host =
planner's PoV) to the result obtained following a host failure and VM restart. *)
let pool = Helpers.get_pool ~__context in
let protected_vms, unprotected_vms =
if Db.Pool.get_ha_enabled ~__context ~self:pool then
if Db.Pool.get_ha_enabled ~__context ~self:pool && not ignore_ha then
List.partition
(fun (_, record) ->
Helpers.vm_should_always_run record.API.vM_ha_always_run
Expand Down Expand Up @@ -393,7 +393,7 @@ let compute_evacuation_plan_no_wlb ~__context ~host =
(* Old Miami style function with the strange error encoding *)
let assert_can_evacuate ~__context ~host =
(* call no_wlb function as we only care about errors, and wlb only provides recs for moveable vms *)
let plans = compute_evacuation_plan_no_wlb ~__context ~host in
let plans = compute_evacuation_plan_no_wlb ~__context ~host () in
let errors =
Hashtbl.fold
(fun _ plan acc ->
Expand All @@ -411,9 +411,10 @@ let assert_can_evacuate ~__context ~host =
(Api_errors.cannot_evacuate_host, [String.concat "|" errors])
)

(* New Orlando style function which returns a Map *)
let get_vms_which_prevent_evacuation ~__context ~self =
let plans = compute_evacuation_plan_no_wlb ~__context ~host:self in
let get_vms_which_prevent_evacuation_internal ~__context ~self ~ignore_ha =
let plans =
compute_evacuation_plan_no_wlb ~__context ~host:self ~ignore_ha ()
in
Hashtbl.fold
(fun vm plan acc ->
match plan with
Expand All @@ -424,6 +425,10 @@ let get_vms_which_prevent_evacuation ~__context ~self =
)
plans []

(* New Orlando style function which returns a Map *)
let get_vms_which_prevent_evacuation ~__context ~self =
get_vms_which_prevent_evacuation_internal ~__context ~self ~ignore_ha:false

let compute_evacuation_plan_wlb ~__context ~self =
(* We treat xapi as primary when it comes to "hard" errors, i.e. those that aren't down to memory constraints. These are things like
VM_REQUIRES_SR or VM_LACKS_FEATURE_SUSPEND.
Expand All @@ -438,7 +443,7 @@ let compute_evacuation_plan_wlb ~__context ~self =
right thing to do because WLB doesn't know about all the HA corner cases (for example), but xapi does.
If there are any VMs left over, record them as HOST_NOT_ENOUGH_FREE_MEMORY, because we assume that WLB thinks they don't fit.
*)
let error_vms = compute_evacuation_plan_no_wlb ~__context ~host:self in
let error_vms = compute_evacuation_plan_no_wlb ~__context ~host:self () in
let vm_recoms =
get_evacuation_recoms ~__context ~uuid:(Db.Host.get_uuid ~__context ~self)
in
Expand Down Expand Up @@ -538,7 +543,7 @@ let compute_evacuation_plan ~__context ~host =
debug
"Using wlb recommendations for choosing a host has been disabled or wlb \
is not available. Using original algorithm" ;
compute_evacuation_plan_no_wlb ~__context ~host
compute_evacuation_plan_no_wlb ~__context ~host ()
) else
try
debug "Using WLB recommendations for host evacuation." ;
Expand All @@ -565,12 +570,12 @@ let compute_evacuation_plan ~__context ~host =
)
with _ -> ()
) ;
compute_evacuation_plan_no_wlb ~__context ~host
compute_evacuation_plan_no_wlb ~__context ~host ()
| _ ->
debug
"Encountered an unknown error when using wlb for choosing host. \
Using original algorithm" ;
compute_evacuation_plan_no_wlb ~__context ~host
compute_evacuation_plan_no_wlb ~__context ~host ()

let evacuate ~__context ~host ~network =
let task = Context.get_task_id __context in
Expand Down
9 changes: 9 additions & 0 deletions ocaml/xapi/xapi_host.mli
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ val assert_can_evacuate : __context:Context.t -> host:API.ref_host -> unit
val get_vms_which_prevent_evacuation :
__context:Context.t -> self:API.ref_host -> (API.ref_VM * string list) list

(* Similar to the (API) call `host.get_vms_which_prevent_evacuation`, but with an
additional `ignore_ha` argument. The makes the evacuation planner behave the
same no matter whether HA is enabled or not. *)
val get_vms_which_prevent_evacuation_internal :
__context:Context.t
-> self:API.ref_host
-> ignore_ha:bool
-> (API.ref_VM * string list) list

val evacuate :
__context:Context.t -> host:API.ref_host -> network:API.ref_network -> unit

Expand Down
3 changes: 2 additions & 1 deletion ocaml/xapi/xapi_pool.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3440,7 +3440,8 @@ let check_update_readiness ~__context ~self:_ ~requires_reboot =
if not alive then
[[Api_errors.host_offline; Ref.string_of host]]
else if requires_reboot then
Xapi_host.get_vms_which_prevent_evacuation ~__context ~self:host
Xapi_host.get_vms_which_prevent_evacuation_internal ~__context ~self:host
~ignore_ha:true
|> List.map (fun (_, error) -> error)
else
[[]]
Expand Down