Skip to content

Commit

Permalink
Clean up when deployment is being deleted
Browse files Browse the repository at this point in the history
Previously, when the controller is removed, it could leave a mess behind
in the form of ConfigurationPolicy objects with finalizers on them. That
would also result in the CRD being stuck.

Now, the controller will put a finalizer on its own deployment when at
least one ConfigurationPolicy has a finalizer. Also, while the
deployment is being deleted, the finalizers on ConfigurationPolicies
will be removed immediately, in the same way they would be when the CRD
was being deleted.

Note: in this implementation, related objects might *never* be pruned
when the controller is removed.

Since the controller deployment is in a different namespace than the
ConfigutaionPolicies it watches, the controller's cache cannot be
limited to a single namespace the way it was before. Instead, this uses
additional cache selectors to limit watches on ConfigurationPolicies
and Deployments to the relevant namespaces.

Refs:
 - https://issues.redhat.com/browse/ACM-2923

Signed-off-by: Justin Kulikauskas <jkulikau@redhat.com>
  • Loading branch information
JustinKuli committed Jan 20, 2023
1 parent fbd1c67 commit 87e0c3b
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 62 deletions.
147 changes: 121 additions & 26 deletions controllers/configurationpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
gocmp "github.com/google/go-cmp/cmp"
"github.com/prometheus/client_golang/prometheus"
templates "github.com/stolostron/go-template-utils/v3/pkg/templates"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
extensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
extensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
Expand Down Expand Up @@ -51,6 +52,7 @@ const (
ControllerName string = "configuration-policy-controller"
CRDName string = "configurationpolicies.policy.open-cluster-management.io"
complianceStatusConditionLimit = 10
pruneObjectFinalizer string = "policy.open-cluster-management.io/delete-related-objects"
)

var log = ctrl.Log.WithName(ControllerName)
Expand Down Expand Up @@ -188,6 +190,26 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint
skipLoop = true
}

needDeploymentFinalizer := false

for _, plc := range policiesList.Items {
if objHasFinalizer(&plc, pruneObjectFinalizer) {
needDeploymentFinalizer = true

break
}
}

if err := r.manageDeploymentFinalizer(needDeploymentFinalizer); err != nil {
if errors.Is(err, common.ErrNoNamespace) || errors.Is(err, common.ErrRunLocal) {
log.Info("Not managing the controller's deployment finalizer because it is running locally")
} else {
log.Error(err, "Failed to manage the controller's deployment finalizer, skipping loop")

skipLoop = true
}
}

// This is done every loop cycle since the channel needs to be variable in size to account for the number of
// policies changing.
policyQueue := make(chan *policyv1.ConfigurationPolicy, len(policiesList.Items))
Expand Down Expand Up @@ -565,6 +587,52 @@ func (r *ConfigurationPolicyReconciler) cleanUpChildObjects(plc policyv1.Configu
return deletionFailures
}

// cleanupImmediately returns true when the cluster is in a state where configurationpolicies should
// be removed as soon as possible, ignoring the pruneObjectBehavior of the policies. This is the
// case when the CRD or the controller's deployment are already being deleted.
func (r *ConfigurationPolicyReconciler) cleanupImmediately() (bool, error) {
deployDeleting, deployErr := r.deploymentIsDeleting()
if deployErr == nil && deployDeleting {
return true, nil
}

defDeleting, defErr := r.definitionIsDeleting()
if defErr == nil && defDeleting {
return true, nil
}

if deployErr == nil && defErr == nil {
// if either was deleting, we would've already returned.
return false, nil
}

// At least one had an uxpected error, so the decision can't be made right now
//nolint:errorlint // we can't choose just one of the errors to "correctly" wrap
return false, fmt.Errorf("deploymentIsDeleting error: '%v', definitionIsDeleting error: '%v'",
deployErr, defErr)
}

func (r *ConfigurationPolicyReconciler) deploymentIsDeleting() (bool, error) {
key, keyErr := common.GetOperatorNamespacedName()
if keyErr != nil {
if errors.Is(keyErr, common.ErrNoNamespace) || errors.Is(keyErr, common.ErrRunLocal) {
// running locally
return false, nil
}

return false, keyErr
}

deployment := appsv1.Deployment{}

err := r.Get(context.TODO(), key, &deployment)
if err != nil {
return false, err
}

return deployment.DeletionTimestamp != nil, nil
}

func (r *ConfigurationPolicyReconciler) definitionIsDeleting() (bool, error) {
key := types.NamespacedName{Name: CRDName}
v1def := extensionsv1.CustomResourceDefinition{}
Expand Down Expand Up @@ -631,13 +699,31 @@ func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc policyv1.Confi
return
}

pruneObjectFinalizer := "policy.open-cluster-management.io/delete-related-objects"

// object handling for when configurationPolicy is deleted
if plc.Spec.PruneObjectBehavior == "DeleteIfCreated" || plc.Spec.PruneObjectBehavior == "DeleteAll" {
cleanupNow, err := r.cleanupImmediately()
if err != nil {
log.Error(err, "Error determining whether to cleanup immediately, requeueing policy")

return
}

if cleanupNow {
if objHasFinalizer(&plc, pruneObjectFinalizer) {
plc.SetFinalizers(removeObjFinalizer(&plc, pruneObjectFinalizer))

err := r.Update(context.TODO(), &plc)
if err != nil {
log.V(1).Error(err, "Error removing finalizer for configuration policy", plc)
}
}

return
}

// set finalizer if it hasn't been set
if !configPlcHasFinalizer(plc, pruneObjectFinalizer) {
plc.SetFinalizers(addConfigPlcFinalizer(plc, pruneObjectFinalizer))
if !objHasFinalizer(&plc, pruneObjectFinalizer) {
plc.SetFinalizers(addObjFinalizer(&plc, pruneObjectFinalizer))

err := r.Update(context.TODO(), &plc)
if err != nil {
Expand All @@ -647,32 +733,13 @@ func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc policyv1.Confi

// kick off object deletion if configurationPolicy has been deleted
if plc.ObjectMeta.DeletionTimestamp != nil {
// If the CRD is deleting, don't prune objects
crdDeleting, err := r.definitionIsDeleting()
if err != nil {
log.Error(err, "Error getting configurationpolicies CRD, requeueing policy")

return
} else if crdDeleting {
log.V(1).Info("The configuraionpolicy CRD is being deleted, ignoring and removing " +
"the delete-related-objects finalizer")

plc.SetFinalizers(removeConfigPlcFinalizer(plc, pruneObjectFinalizer))
err := r.Update(context.TODO(), &plc)
if err != nil {
log.V(1).Error(err, "Error unsetting finalizer for configuration policy", plc)
}

return
} // else: CRD is not being deleted

log.V(1).Info("Config policy has been deleted, handling child objects")

failures := r.cleanUpChildObjects(plc)

if len(failures) == 0 {
log.V(1).Info("Objects have been successfully cleaned up, removing finalizer")
plc.SetFinalizers(removeConfigPlcFinalizer(plc, pruneObjectFinalizer))
plc.SetFinalizers(removeObjFinalizer(&plc, pruneObjectFinalizer))

err := r.Update(context.TODO(), &plc)
if err != nil {
Expand Down Expand Up @@ -704,9 +771,9 @@ func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc policyv1.Confi

return
}
} else if configPlcHasFinalizer(plc, pruneObjectFinalizer) {
} else if objHasFinalizer(&plc, pruneObjectFinalizer) {
// if pruneObjectBehavior is none, no finalizer is needed
plc.SetFinalizers(removeConfigPlcFinalizer(plc, pruneObjectFinalizer))
plc.SetFinalizers(removeObjFinalizer(&plc, pruneObjectFinalizer))
err := r.Update(context.TODO(), &plc)
if err != nil {
log.V(1).Error(err, "Error unsetting finalizer for configuration policy", plc)
Expand Down Expand Up @@ -2672,3 +2739,31 @@ func convertPolicyStatusToString(plc *policyv1.ConfigurationPolicy) (results str

return result
}

func (r *ConfigurationPolicyReconciler) manageDeploymentFinalizer(shouldBeSet bool) error {
key, err := common.GetOperatorNamespacedName()
if err != nil {
return err
}

deployment := appsv1.Deployment{}
if err := r.Client.Get(context.TODO(), key, &deployment); err != nil {
return err
}

if objHasFinalizer(&deployment, pruneObjectFinalizer) {
if shouldBeSet {
return nil
}

deployment.SetFinalizers(removeObjFinalizer(&deployment, pruneObjectFinalizer))
} else {
if !shouldBeSet {
return nil
}

deployment.SetFinalizers(addObjFinalizer(&deployment, pruneObjectFinalizer))
}

return r.Update(context.TODO(), &deployment)
}
18 changes: 10 additions & 8 deletions controllers/configurationpolicy_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strings"

apiRes "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/json"
Expand Down Expand Up @@ -530,8 +531,8 @@ func sortAndJoinKeys(m map[string]bool, sep string) string {
return strings.Join(keys, sep)
}

func configPlcHasFinalizer(plc policyv1.ConfigurationPolicy, finalizer string) bool {
for _, existingFinalizer := range plc.GetFinalizers() {
func objHasFinalizer(obj metav1.Object, finalizer string) bool {
for _, existingFinalizer := range obj.GetFinalizers() {
if existingFinalizer == finalizer {
return true
}
Expand All @@ -540,18 +541,19 @@ func configPlcHasFinalizer(plc policyv1.ConfigurationPolicy, finalizer string) b
return false
}

func addConfigPlcFinalizer(plc policyv1.ConfigurationPolicy, finalizer string) []string {
if configPlcHasFinalizer(plc, finalizer) {
return plc.GetFinalizers()
func addObjFinalizer(obj metav1.Object, finalizer string) []string {
if objHasFinalizer(obj, finalizer) {
return obj.GetFinalizers()
}

return append(plc.GetFinalizers(), finalizer)
return append(obj.GetFinalizers(), finalizer)
}

func removeConfigPlcFinalizer(plc policyv1.ConfigurationPolicy, finalizer string) []string {
// nolint: unparam
func removeObjFinalizer(obj metav1.Object, finalizer string) []string {
result := []string{}

for _, existingFinalizer := range plc.GetFinalizers() {
for _, existingFinalizer := range obj.GetFinalizers() {
if existingFinalizer != finalizer {
result = append(result, existingFinalizer)
}
Expand Down
78 changes: 50 additions & 28 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ import (
"github.com/go-logr/zapr"
"github.com/spf13/pflag"
"github.com/stolostron/go-log-utils/zaputil"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"

// to ensure that exec-entrypoint and run can make use of them.
extensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
extensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
"k8s.io/apimachinery/pkg/fields"
Expand All @@ -26,6 +25,8 @@ import (
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"

// Import all k8s client auth plugins to ensure that exec-entrypoint and run can use them
_ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
Expand Down Expand Up @@ -147,14 +148,6 @@ func main() {

printVersion()

namespace, err := common.GetWatchNamespace()
if err != nil {
log.Error(err, "Failed to get watch namespace")
os.Exit(1)
}

log.V(2).Info("Configured the watch namespace", "namespace", namespace)

// Get a config to talk to the apiserver
cfg, err := config.GetConfig()
if err != nil {
Expand All @@ -163,29 +156,63 @@ func main() {
}

// Set a field selector so that a watch on CRDs will be limited to just the configuration policy CRD.
newCacheFunc := cache.BuilderWithOptions(
cache.Options{
SelectorsByObject: cache.SelectorsByObject{
&extensionsv1.CustomResourceDefinition{}: {
Field: fields.SelectorFromSet(fields.Set{"metadata.name": controllers.CRDName}),
},
&extensionsv1beta1.CustomResourceDefinition{}: {
Field: fields.SelectorFromSet(fields.Set{"metadata.name": controllers.CRDName}),
},
},
cacheSelectors := cache.SelectorsByObject{
&extensionsv1.CustomResourceDefinition{}: {
Field: fields.SelectorFromSet(fields.Set{"metadata.name": controllers.CRDName}),
},
)
&extensionsv1beta1.CustomResourceDefinition{}: {
Field: fields.SelectorFromSet(fields.Set{"metadata.name": controllers.CRDName}),
},
}

ctrlKey, err := common.GetOperatorNamespacedName()
if err != nil {
if errors.Is(err, common.ErrNoNamespace) || errors.Is(err, common.ErrRunLocal) {
log.Info("Running locally, skipping restrictions on the Deployment cache")
} else {
log.Error(err, "Failed to identify the controller's deployment")
os.Exit(1)
}
} else {
cacheSelectors[&appsv1.Deployment{}] = cache.ObjectSelector{
Field: fields.SelectorFromSet(fields.Set{
"metadata.namespace": ctrlKey.Namespace,
"metadata.name": ctrlKey.Name,
}),
}
}

watchNamespace, err := common.GetWatchNamespace()
if strings.Contains(watchNamespace, ",") {
err = fmt.Errorf("multiple watched namespaces are not allowed for this controller")
}

if err != nil {
log.Error(err, "Failed to get watch namespace")
os.Exit(1)
}

log.V(2).Info("Configured the watch namespace", "watchNamespace", watchNamespace)

if watchNamespace != "" {
cacheSelectors[&policyv1.ConfigurationPolicy{}] = cache.ObjectSelector{
Field: fields.SelectorFromSet(fields.Set{
"metadata.namespace": watchNamespace,
}),
}
} else {
log.Info("Skipping restrictions on the ConfigurationPolicy cache because watchNamespace is empty")
}

// Set default manager options
options := manager.Options{
Namespace: namespace,
MetricsBindAddress: metricsAddr,
Scheme: scheme,
Port: 9443,
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "config-policy-controller.open-cluster-management.io",
NewCache: newCacheFunc,
NewCache: cache.BuilderWithOptions(cache.Options{SelectorsByObject: cacheSelectors}),
// Disable the cache for Secrets to avoid a watch getting created when the `policy-encryption-key`
// Secret is retrieved. Special cache handling is done by the controller.
ClientDisableCacheFor: []client.Object{&corev1.Secret{}},
Expand Down Expand Up @@ -217,11 +244,6 @@ func main() {
),
}

if strings.Contains(namespace, ",") {
options.Namespace = ""
options.NewCache = cache.MultiNamespacedCacheBuilder(strings.Split(namespace, ","))
}

if legacyLeaderElection {
// If legacyLeaderElection is enabled, then that means the lease API is not available.
// In this case, use the legacy leader election method of a ConfigMap.
Expand Down
Loading

0 comments on commit 87e0c3b

Please sign in to comment.