diff --git a/build/common/config/.golangci.yml b/build/common/config/.golangci.yml index a2e1e9b9..4dba101a 100644 --- a/build/common/config/.golangci.yml +++ b/build/common/config/.golangci.yml @@ -29,6 +29,9 @@ linters: enable-all: true disable: - bodyclose + # Disable contextcheck since we don't want to pass the context passed to PeriodicallyExecConfigPolicies to functions + # called within it or else cleanup won't occur + - contextcheck - cyclop - depguard - dupl diff --git a/controllers/configurationpolicy_controller.go b/controllers/configurationpolicy_controller.go index f2873970..2a60baae 100644 --- a/controllers/configurationpolicy_controller.go +++ b/controllers/configurationpolicy_controller.go @@ -151,15 +151,32 @@ func (r *ConfigurationPolicyReconciler) Reconcile(ctx context.Context, request c // PeriodicallyExecConfigPolicies loops through all configurationpolicies in the target namespace and triggers // template handling for each one. This function drives all the work the configuration policy controller does. -func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint, elected <-chan struct{}, test bool) { +func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies( + ctx context.Context, freq uint, elected <-chan struct{}, +) { log.Info("Waiting for leader election before periodically evaluating configuration policies") - <-elected + + select { + case <-elected: + case <-ctx.Done(): + return + } const waiting = 10 * time.Minute + var exiting bool + // Loop twice after exit condition is received to account for race conditions and retries. + loopsAfterExit := 2 - for { + for !exiting || (exiting && loopsAfterExit > 0) { start := time.Now() + select { + case <-ctx.Done(): + exiting = true + loopsAfterExit-- + default: + } + policiesList := policyv1.ConfigurationPolicyList{} var skipLoop bool @@ -211,6 +228,13 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint } } + cleanupImmediately, err := r.cleanupImmediately() + if err != nil { + log.Error(err, "Failed to determine if it's time to cleanup immediately") + + skipLoop = true + } + // This is done every loop cycle since the channel needs to be variable in size to account for the number of // policies changing. policyQueue := make(chan *policyv1.ConfigurationPolicy, len(policiesList.Items)) @@ -230,7 +254,7 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint for i := range policiesList.Items { policy := policiesList.Items[i] - if !shouldEvaluatePolicy(&policy) { + if !shouldEvaluatePolicy(&policy, cleanupImmediately) { continue } @@ -257,10 +281,6 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint log.V(2).Info("Sleeping before reprocessing the configuration policies", "seconds", sleepTime) time.Sleep(sleepTime) } - - if test { - return - } } } @@ -317,10 +337,18 @@ func (r *ConfigurationPolicyReconciler) refreshDiscoveryInfo() error { // shouldEvaluatePolicy will determine if the policy is ready for evaluation by examining the // status.lastEvaluated and status.lastEvaluatedGeneration fields. If a policy has been updated, it // will always be triggered for evaluation. If the spec.evaluationInterval configuration has been -// met, then that will also trigger an evaluation. -func shouldEvaluatePolicy(policy *policyv1.ConfigurationPolicy) bool { +// met, then that will also trigger an evaluation. If cleanupImmediately is true, then only policies +// with finalizers will be ready for evaluation regardless of the last evaluation. +// cleanupImmediately should be set true when the controller is getting uninstalled. +func shouldEvaluatePolicy(policy *policyv1.ConfigurationPolicy, cleanupImmediately bool) bool { log := log.WithValues("policy", policy.GetName()) + // If it's time to clean up such as when the config-policy-controller is being uninstalled, only evaluate policies + // with a finalizer to remove the finalizer. + if cleanupImmediately { + return len(policy.Finalizers) != 0 + } + if policy.ObjectMeta.DeletionTimestamp != nil { log.V(2).Info("The policy has been deleted and is waiting for object cleanup. Will evaluate it now.") diff --git a/controllers/configurationpolicy_controller_test.go b/controllers/configurationpolicy_controller_test.go index 202c5a34..d68f9dd4 100644 --- a/controllers/configurationpolicy_controller_test.go +++ b/controllers/configurationpolicy_controller_test.go @@ -550,6 +550,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { complianceState policyv1.ComplianceState expected bool deletionTimestamp *metav1.Time + cleanupImmediately bool + finalizers []string }{ { "Just evaluated and the generation is unchanged", @@ -559,6 +561,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.Compliant, false, nil, + false, + []string{}, }, { "The generation has changed", @@ -568,6 +572,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.Compliant, true, nil, + false, + []string{}, }, { "lastEvaluated not set", @@ -577,6 +583,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.Compliant, true, nil, + false, + []string{}, }, { "Invalid lastEvaluated", @@ -586,6 +594,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.Compliant, true, nil, + false, + []string{}, }, { "Unknown compliance state", @@ -595,6 +605,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.UnknownCompliancy, true, nil, + false, + []string{}, }, { "Default evaluation interval with a past lastEvaluated when compliant", @@ -604,6 +616,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.Compliant, true, nil, + false, + []string{}, }, { "Default evaluation interval with a past lastEvaluated when noncompliant", @@ -613,6 +627,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.NonCompliant, true, nil, + false, + []string{}, }, { "Never evaluation interval with past lastEvaluated when compliant", @@ -622,6 +638,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.Compliant, false, nil, + false, + []string{}, }, { "Never evaluation interval with past lastEvaluated when noncompliant", @@ -631,6 +649,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.NonCompliant, false, nil, + false, + []string{}, }, { "Invalid evaluation interval when compliant", @@ -640,6 +660,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.Compliant, true, nil, + false, + []string{}, }, { "Invalid evaluation interval when noncompliant", @@ -649,6 +671,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.NonCompliant, true, nil, + false, + []string{}, }, { "Custom evaluation interval that hasn't past yet when compliant", @@ -658,6 +682,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.Compliant, false, nil, + false, + []string{}, }, { "Custom evaluation interval that hasn't past yet when noncompliant", @@ -667,6 +693,8 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.NonCompliant, false, nil, + false, + []string{}, }, { "Deletion timestamp is non nil", @@ -676,6 +704,30 @@ func TestShouldEvaluatePolicy(t *testing.T) { policyv1.NonCompliant, true, &metav1.Time{Time: time.Now()}, + false, + []string{}, + }, + { + "Finalizer and the controller is being deleted", + time.Now().UTC().Add(-13 * time.Hour).Format(time.RFC3339), + 2, + policyv1.EvaluationInterval{NonCompliant: "12h"}, + policyv1.NonCompliant, + true, + &metav1.Time{Time: time.Now()}, + true, + []string{pruneObjectFinalizer}, + }, + { + "No finalizer and the controller is being deleted", + time.Now().UTC().Add(-13 * time.Hour).Format(time.RFC3339), + 2, + policyv1.EvaluationInterval{NonCompliant: "12h"}, + policyv1.NonCompliant, + false, + &metav1.Time{Time: time.Now()}, + true, + []string{}, }, } @@ -693,8 +745,9 @@ func TestShouldEvaluatePolicy(t *testing.T) { policy.Spec.EvaluationInterval = test.evaluationInterval policy.Status.ComplianceState = test.complianceState policy.ObjectMeta.DeletionTimestamp = test.deletionTimestamp + policy.ObjectMeta.Finalizers = test.finalizers - if actual := shouldEvaluatePolicy(policy); actual != test.expected { + if actual := shouldEvaluatePolicy(policy, test.cleanupImmediately); actual != test.expected { t.Fatalf("expected %v but got %v", test.expected, actual) } }, diff --git a/main.go b/main.go index c57f04ec..c9e25bd8 100644 --- a/main.go +++ b/main.go @@ -320,10 +320,16 @@ func main() { os.Exit(1) } + terminatingCtx := ctrl.SetupSignalHandler() + managerCtx, managerCancel := context.WithCancel(context.Background()) + // PeriodicallyExecConfigPolicies is the go-routine that periodically checks the policies log.V(1).Info("Perodically processing Configuration Policies", "frequency", frequency) - go reconciler.PeriodicallyExecConfigPolicies(frequency, mgr.Elected(), false) + go func() { + reconciler.PeriodicallyExecConfigPolicies(terminatingCtx, frequency, mgr.Elected()) + managerCancel() + }() // This lease is not related to leader election. This is to report the status of the controller // to the addon framework. This can be seen in the "status" section of the ManagedClusterAddOn @@ -361,7 +367,7 @@ func main() { log.Info("Starting manager") - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + if err := mgr.Start(managerCtx); err != nil { log.Error(err, "Problem running manager") os.Exit(1) }