Skip to content

Commit

Permalink
CA-366309: ignore HA when checking update readiness
Browse files Browse the repository at this point in the history
In particular, the evacuation plan is computed in a different way when
HA is enabled. We want the update readiness to check things as if HA is
disabled, because before updates are applied, HA will be disabled anyway.

Signed-off-by: Rob Hoes <rob.hoes@citrix.com>
  • Loading branch information
robhoes committed May 18, 2022
1 parent 3bda7d9 commit 8bb7f7c
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 11 deletions.
25 changes: 15 additions & 10 deletions ocaml/xapi/xapi_host.ml
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ let string_of_per_vm_plan p =

(** Return a table mapping VMs to 'per_vm_plan' types indicating either a target
Host or a reason why the VM cannot be migrated. *)
let compute_evacuation_plan_no_wlb ~__context ~host =
let compute_evacuation_plan_no_wlb ~__context ~host ?(ignore_ha = false) () =
let all_hosts = Db.Host.get_all ~__context in
let enabled_hosts =
List.filter (fun self -> Db.Host.get_enabled ~__context ~self) all_hosts
Expand Down Expand Up @@ -308,7 +308,7 @@ let compute_evacuation_plan_no_wlb ~__context ~host =
planner's PoV) to the result obtained following a host failure and VM restart. *)
let pool = Helpers.get_pool ~__context in
let protected_vms, unprotected_vms =
if Db.Pool.get_ha_enabled ~__context ~self:pool then
if Db.Pool.get_ha_enabled ~__context ~self:pool && not ignore_ha then
List.partition
(fun (_, record) ->
Helpers.vm_should_always_run record.API.vM_ha_always_run
Expand Down Expand Up @@ -393,7 +393,7 @@ let compute_evacuation_plan_no_wlb ~__context ~host =
(* Old Miami style function with the strange error encoding *)
let assert_can_evacuate ~__context ~host =
(* call no_wlb function as we only care about errors, and wlb only provides recs for moveable vms *)
let plans = compute_evacuation_plan_no_wlb ~__context ~host in
let plans = compute_evacuation_plan_no_wlb ~__context ~host () in
let errors =
Hashtbl.fold
(fun _ plan acc ->
Expand All @@ -411,9 +411,10 @@ let assert_can_evacuate ~__context ~host =
(Api_errors.cannot_evacuate_host, [String.concat "|" errors])
)

(* New Orlando style function which returns a Map *)
let get_vms_which_prevent_evacuation ~__context ~self =
let plans = compute_evacuation_plan_no_wlb ~__context ~host:self in
let get_vms_which_prevent_evacuation_internal ~__context ~self ~ignore_ha =
let plans =
compute_evacuation_plan_no_wlb ~__context ~host:self ~ignore_ha ()
in
Hashtbl.fold
(fun vm plan acc ->
match plan with
Expand All @@ -424,6 +425,10 @@ let get_vms_which_prevent_evacuation ~__context ~self =
)
plans []

(* New Orlando style function which returns a Map *)
let get_vms_which_prevent_evacuation ~__context ~self =
get_vms_which_prevent_evacuation_internal ~__context ~self ~ignore_ha:false

let compute_evacuation_plan_wlb ~__context ~self =
(* We treat xapi as primary when it comes to "hard" errors, i.e. those that aren't down to memory constraints. These are things like
VM_REQUIRES_SR or VM_LACKS_FEATURE_SUSPEND.
Expand All @@ -438,7 +443,7 @@ let compute_evacuation_plan_wlb ~__context ~self =
right thing to do because WLB doesn't know about all the HA corner cases (for example), but xapi does.
If there are any VMs left over, record them as HOST_NOT_ENOUGH_FREE_MEMORY, because we assume that WLB thinks they don't fit.
*)
let error_vms = compute_evacuation_plan_no_wlb ~__context ~host:self in
let error_vms = compute_evacuation_plan_no_wlb ~__context ~host:self () in
let vm_recoms =
get_evacuation_recoms ~__context ~uuid:(Db.Host.get_uuid ~__context ~self)
in
Expand Down Expand Up @@ -538,7 +543,7 @@ let compute_evacuation_plan ~__context ~host =
debug
"Using wlb recommendations for choosing a host has been disabled or wlb \
is not available. Using original algorithm" ;
compute_evacuation_plan_no_wlb ~__context ~host
compute_evacuation_plan_no_wlb ~__context ~host ()
) else
try
debug "Using WLB recommendations for host evacuation." ;
Expand All @@ -565,12 +570,12 @@ let compute_evacuation_plan ~__context ~host =
)
with _ -> ()
) ;
compute_evacuation_plan_no_wlb ~__context ~host
compute_evacuation_plan_no_wlb ~__context ~host ()
| _ ->
debug
"Encountered an unknown error when using wlb for choosing host. \
Using original algorithm" ;
compute_evacuation_plan_no_wlb ~__context ~host
compute_evacuation_plan_no_wlb ~__context ~host ()

let evacuate ~__context ~host ~network =
let task = Context.get_task_id __context in
Expand Down
9 changes: 9 additions & 0 deletions ocaml/xapi/xapi_host.mli
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ val assert_can_evacuate : __context:Context.t -> host:API.ref_host -> unit
val get_vms_which_prevent_evacuation :
__context:Context.t -> self:API.ref_host -> (API.ref_VM * string list) list

(* Similar to the (API) call `host.get_vms_which_prevent_evacuation`, but with an
additional `ignore_ha` argument. The makes the evacuation planner behave the
same no matter whether HA is enabled or not. *)
val get_vms_which_prevent_evacuation_internal :
__context:Context.t
-> self:API.ref_host
-> ignore_ha:bool
-> (API.ref_VM * string list) list

val evacuate :
__context:Context.t -> host:API.ref_host -> network:API.ref_network -> unit

Expand Down
3 changes: 2 additions & 1 deletion ocaml/xapi/xapi_pool.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3440,7 +3440,8 @@ let check_update_readiness ~__context ~self:_ ~requires_reboot =
if not alive then
[[Api_errors.host_offline; Ref.string_of host]]
else if requires_reboot then
Xapi_host.get_vms_which_prevent_evacuation ~__context ~self:host
Xapi_host.get_vms_which_prevent_evacuation_internal ~__context ~self:host
~ignore_ha:true
|> List.map (fun (_, error) -> error)
else
[[]]
Expand Down

0 comments on commit 8bb7f7c

Please sign in to comment.