From f1d4d7eba9ca646464d2d44d7f68541e0c02cb0e Mon Sep 17 00:00:00 2001
From: Rob Hoes <rob.hoes@citrix.com>
Date: Tue, 17 May 2022 15:10:32 +0000
Subject: [PATCH] CA-366309: ignore HA when checking update readiness

In particular, the evacuation plan is computed in a different way when
HA is enabled. We want the update readiness to check things as if HA is
disabled, because before updates are applied, HA will be disabled anyway.

Signed-off-by: Rob Hoes <rob.hoes@citrix.com>
---
 ocaml/xapi/xapi_host.ml  | 25 +++++++++++++++----------
 ocaml/xapi/xapi_host.mli |  9 +++++++++
 ocaml/xapi/xapi_pool.ml  |  3 ++-
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml
index e5fb8889fbb..30ffc84050e 100644
--- a/ocaml/xapi/xapi_host.ml
+++ b/ocaml/xapi/xapi_host.ml
@@ -257,7 +257,7 @@ let string_of_per_vm_plan p =
 
 (** Return a table mapping VMs to 'per_vm_plan' types indicating either a target
     	Host or a reason why the VM cannot be migrated. *)
-let compute_evacuation_plan_no_wlb ~__context ~host =
+let compute_evacuation_plan_no_wlb ~__context ~host ?(ignore_ha = false) () =
   let all_hosts = Db.Host.get_all ~__context in
   let enabled_hosts =
     List.filter (fun self -> Db.Host.get_enabled ~__context ~self) all_hosts
@@ -308,7 +308,7 @@ let compute_evacuation_plan_no_wlb ~__context ~host =
        			   planner's PoV) to the result obtained following a host failure and VM restart. *)
     let pool = Helpers.get_pool ~__context in
     let protected_vms, unprotected_vms =
-      if Db.Pool.get_ha_enabled ~__context ~self:pool then
+      if Db.Pool.get_ha_enabled ~__context ~self:pool && not ignore_ha then
         List.partition
           (fun (_, record) ->
             Helpers.vm_should_always_run record.API.vM_ha_always_run
@@ -393,7 +393,7 @@ let compute_evacuation_plan_no_wlb ~__context ~host =
 (* Old Miami style function with the strange error encoding *)
 let assert_can_evacuate ~__context ~host =
   (* call no_wlb function as we only care about errors, and wlb only provides recs for moveable vms *)
-  let plans = compute_evacuation_plan_no_wlb ~__context ~host in
+  let plans = compute_evacuation_plan_no_wlb ~__context ~host () in
   let errors =
     Hashtbl.fold
       (fun _ plan acc ->
@@ -411,9 +411,10 @@ let assert_can_evacuate ~__context ~host =
          (Api_errors.cannot_evacuate_host, [String.concat "|" errors])
       )
 
-(* New Orlando style function which returns a Map *)
-let get_vms_which_prevent_evacuation ~__context ~self =
-  let plans = compute_evacuation_plan_no_wlb ~__context ~host:self in
+let get_vms_which_prevent_evacuation_internal ~__context ~self ~ignore_ha =
+  let plans =
+    compute_evacuation_plan_no_wlb ~__context ~host:self ~ignore_ha ()
+  in
   Hashtbl.fold
     (fun vm plan acc ->
       match plan with
@@ -424,6 +425,10 @@ let get_vms_which_prevent_evacuation ~__context ~self =
     )
     plans []
 
+(* New Orlando style function which returns a Map *)
+let get_vms_which_prevent_evacuation ~__context ~self =
+  get_vms_which_prevent_evacuation_internal ~__context ~self ~ignore_ha:false
+
 let compute_evacuation_plan_wlb ~__context ~self =
   (* We treat xapi as primary when it comes to "hard" errors, i.e. those that aren't down to memory constraints.  These are things like
      	 VM_REQUIRES_SR or VM_LACKS_FEATURE_SUSPEND.
@@ -438,7 +443,7 @@ let compute_evacuation_plan_wlb ~__context ~self =
      	   right thing to do because WLB doesn't know about all the HA corner cases (for example), but xapi does.
      	   If there are any VMs left over, record them as HOST_NOT_ENOUGH_FREE_MEMORY, because we assume that WLB thinks they don't fit.
   *)
-  let error_vms = compute_evacuation_plan_no_wlb ~__context ~host:self in
+  let error_vms = compute_evacuation_plan_no_wlb ~__context ~host:self () in
   let vm_recoms =
     get_evacuation_recoms ~__context ~uuid:(Db.Host.get_uuid ~__context ~self)
   in
@@ -538,7 +543,7 @@ let compute_evacuation_plan ~__context ~host =
     debug
       "Using wlb recommendations for choosing a host has been disabled or wlb \
        is not available. Using original algorithm" ;
-    compute_evacuation_plan_no_wlb ~__context ~host
+    compute_evacuation_plan_no_wlb ~__context ~host ()
   ) else
     try
       debug "Using WLB recommendations for host evacuation." ;
@@ -565,12 +570,12 @@ let compute_evacuation_plan ~__context ~host =
               )
           with _ -> ()
         ) ;
-        compute_evacuation_plan_no_wlb ~__context ~host
+        compute_evacuation_plan_no_wlb ~__context ~host ()
     | _ ->
         debug
           "Encountered an unknown error when using wlb for choosing host. \
            Using original algorithm" ;
-        compute_evacuation_plan_no_wlb ~__context ~host
+        compute_evacuation_plan_no_wlb ~__context ~host ()
 
 let evacuate ~__context ~host ~network =
   let task = Context.get_task_id __context in
diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli
index f36b9a6f10f..89c85345170 100644
--- a/ocaml/xapi/xapi_host.mli
+++ b/ocaml/xapi/xapi_host.mli
@@ -54,6 +54,15 @@ val assert_can_evacuate : __context:Context.t -> host:API.ref_host -> unit
 val get_vms_which_prevent_evacuation :
   __context:Context.t -> self:API.ref_host -> (API.ref_VM * string list) list
 
+(* Similar to the (API) call `host.get_vms_which_prevent_evacuation`, but with an
+   additional `ignore_ha` argument. The makes the evacuation planner behave the
+   same no matter whether HA is enabled or not. *)
+val get_vms_which_prevent_evacuation_internal :
+     __context:Context.t
+  -> self:API.ref_host
+  -> ignore_ha:bool
+  -> (API.ref_VM * string list) list
+
 val evacuate :
   __context:Context.t -> host:API.ref_host -> network:API.ref_network -> unit
 
diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml
index fe0ee60f5db..46c9a97ef51 100644
--- a/ocaml/xapi/xapi_pool.ml
+++ b/ocaml/xapi/xapi_pool.ml
@@ -3440,7 +3440,8 @@ let check_update_readiness ~__context ~self:_ ~requires_reboot =
     if not alive then
       [[Api_errors.host_offline; Ref.string_of host]]
     else if requires_reboot then
-      Xapi_host.get_vms_which_prevent_evacuation ~__context ~self:host
+      Xapi_host.get_vms_which_prevent_evacuation_internal ~__context ~self:host
+        ~ignore_ha:true
       |> List.map (fun (_, error) -> error)
     else
       [[]]