From 7db6d284585c9f1402bb17ab8e9ff5bf6a523751 Mon Sep 17 00:00:00 2001 From: mprahl Date: Thu, 28 Mar 2024 13:18:51 -0400 Subject: [PATCH] Handle policy recreation race condition When the configuration policy is recreated after the List call completes in the loop in the PeriodicallyExecConfigPolicies method, the old configuration policy status gets set on the new configuration policy which can lead to a compliance event not being set on the replicated parent policy. Relates: https://issues.redhat.com/browse/ACM-10500 Co-authored-by: Justin Kulikauskas Signed-off-by: mprahl --- controllers/configurationpolicy_controller.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/controllers/configurationpolicy_controller.go b/controllers/configurationpolicy_controller.go index 995d99c3..baed5fb2 100644 --- a/controllers/configurationpolicy_controller.go +++ b/controllers/configurationpolicy_controller.go @@ -2982,6 +2982,7 @@ func (r *ConfigurationPolicyReconciler) updatePolicyStatus( "Updating configurationPolicy status", "status", policy.Status.ComplianceState, "policy", policy.GetName(), ) + evaluatedUID := policy.UID updatedStatus := policy.Status maxRetries := 3 @@ -2991,6 +2992,21 @@ func (r *ConfigurationPolicyReconciler) updatePolicyStatus( log.Info(fmt.Sprintf("Failed to refresh policy; using previously fetched version: %s", err)) } else { policy.Status = updatedStatus + + // If the policy was recreated after it was retrieved from the cache, there could be no compliance + // set on the parent policy. Updating the status here may prevent a compliance event from being generated on + // future evaluations so skipping the status update is very important. If sendEvent was true, there is still + // value to send the compliance event to the parent policy so that this history is not lost before the + // policy was recreated. + if evaluatedUID != policy.UID { + log.Info("The ConfigurationPolicy was recreated after it was evaluated. Skipping the status update.") + + // Reset the original UID so that if there are more status updates (i.e. batches), the status on the + // API server is never updated. + policy.UID = evaluatedUID + + break + } } err = r.Status().Update(context.TODO(), policy)