From d4262277b3ee79761c46eafd991b3286c68e25eb Mon Sep 17 00:00:00 2001 From: Justin Kulikauskas Date: Tue, 13 Dec 2022 11:25:24 -0500 Subject: [PATCH] Skip pruning when the CRD is being deleted In some upgrade cases, the ConfigurationPolicy CRD might be temporarily removed. This usually triggers all of the ConfigurationPolicies to be deleted, which would trigger the pruning behavior. But deleting those resources could cause problems, even though they might immediately be recreated when the upgrade finishes. Even outside of upgrades, deleting the CRD "by accident" should probably not trigger the prune behavior. Now, before pruning anything, the controller checks if the CRD is being deleted. BUG: `DeleteIfCreated` will not work perfectly through an upgrade: if the object was created by the policy *before* the CRD was removed, it won't be pruned by the policy after everything is re-created. But missing a deletion seems like a better behavior than deleting extra things. Refs: - https://issues.redhat.com/browse/ACM-2355 Signed-off-by: Justin Kulikauskas --- controllers/configurationpolicy_controller.go | 48 +++++++++++++ main.go | 8 ++- test/e2e/case20_delete_objects_test.go | 72 +++++++++++++++++++ .../case20_musthave_pod_deleteall.yaml | 23 ++++++ 4 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 test/resources/case20_delete_objects/case20_musthave_pod_deleteall.yaml diff --git a/controllers/configurationpolicy_controller.go b/controllers/configurationpolicy_controller.go index cdcd9a6d..fd9cbd6b 100644 --- a/controllers/configurationpolicy_controller.go +++ b/controllers/configurationpolicy_controller.go @@ -19,12 +19,15 @@ import ( "github.com/prometheus/client_golang/prometheus" templates "github.com/stolostron/go-template-utils/v3/pkg/templates" corev1 "k8s.io/api/core/v1" + extensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + extensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" k8serrors "k8s.io/apimachinery/pkg/api/errors" meta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" apimachineryerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/json" "k8s.io/client-go/dynamic" @@ -557,6 +560,32 @@ func (r *ConfigurationPolicyReconciler) cleanUpChildObjects(plc policyv1.Configu return deletionFailures } +func (r *ConfigurationPolicyReconciler) definitionIsDeleting() (bool, error) { + key := types.NamespacedName{Name: "configurationpolicies.policy.open-cluster-management.io"} + v1def := extensionsv1.CustomResourceDefinition{} + + v1err := r.Get(context.TODO(), key, &v1def) + if v1err == nil { + return (v1def.ObjectMeta.DeletionTimestamp != nil), nil + } + + v1beta1def := extensionsv1beta1.CustomResourceDefinition{} + + v1beta1err := r.Get(context.TODO(), key, &v1beta1def) + if v1beta1err == nil { + return (v1beta1def.DeletionTimestamp != nil), nil + } + + // It might not be possible to get a not-found on the CRD while reconciling the CR... + // But in that case, it seems reasonable to still consider it "deleting" + if k8serrors.IsNotFound(v1err) || k8serrors.IsNotFound(v1beta1err) { + return true, nil + } + + // Both had unexpected errors, return them and retry later + return false, fmt.Errorf("v1: %v, v1beta1: %v", v1err, v1beta1err) //nolint:errorlint +} + // handleObjectTemplates iterates through all policy templates in a given policy and processes them func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc policyv1.ConfigurationPolicy) { log := log.WithValues("policy", plc.GetName()) @@ -613,6 +642,25 @@ func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc policyv1.Confi // kick off object deletion if configurationPolicy has been deleted if plc.ObjectMeta.DeletionTimestamp != nil { + // If the CRD is deleting, don't prune objects + crdDeleting, err := r.definitionIsDeleting() + if err != nil { + log.Error(err, "Error getting configurationpolicies CRD, requeueing policy") + + return + } else if crdDeleting { + log.V(1).Info("The configuraionpolicy CRD is being deleted, ignoring and removing " + + "the delete-related-objects finalizer") + + plc.SetFinalizers(removeConfigPlcFinalizer(plc, pruneObjectFinalizer)) + err := r.Update(context.TODO(), &plc) + if err != nil { + log.V(1).Error(err, "Error unsetting finalizer for configuration policy", plc) + } + + return + } // else: CRD is not being deleted + log.V(1).Info("Config policy has been deleted, handling child objects") failures := r.cleanUpChildObjects(plc) diff --git a/main.go b/main.go index 132f595f..ada205f7 100644 --- a/main.go +++ b/main.go @@ -16,12 +16,14 @@ import ( "github.com/spf13/pflag" "github.com/stolostron/go-log-utils/zaputil" corev1 "k8s.io/api/core/v1" + + // to ensure that exec-entrypoint and run can make use of them. + extensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + extensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" k8sruntime "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes" clientgoscheme "k8s.io/client-go/kubernetes/scheme" - - // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -56,6 +58,8 @@ func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme utilruntime.Must(policyv1.AddToScheme(scheme)) + utilruntime.Must(extensionsv1.AddToScheme(scheme)) + utilruntime.Must(extensionsv1beta1.AddToScheme(scheme)) } func main() { diff --git a/test/e2e/case20_delete_objects_test.go b/test/e2e/case20_delete_objects_test.go index 686ab161..271a9240 100644 --- a/test/e2e/case20_delete_objects_test.go +++ b/test/e2e/case20_delete_objects_test.go @@ -24,6 +24,8 @@ const ( case20ConfigPolicyNameInform string = "policy-pod-inform" case20ConfigPolicyNameFinalizer string = "policy-pod-create-withfinalizer" case20ConfigPolicyNameChange string = "policy-pod-change-remediation" + case20ConfigPolicyNameMHPDA string = "policy-pod-mhpda" + case20PodMHPDAName string = "nginx-pod-e2e20-mhpda" case20PodYaml string = "../resources/case20_delete_objects/case20_pod.yaml" case20PolicyYamlCreate string = "../resources/case20_delete_objects/case20_create_pod.yaml" case20PolicyYamlEdit string = "../resources/case20_delete_objects/case20_edit_pod.yaml" @@ -32,6 +34,10 @@ const ( case20PolicyYamlFinalizer string = "../resources/case20_delete_objects/case20_createpod_finalizer.yaml" case20PolicyYamlChangeInform string = "../resources/case20_delete_objects/case20_change_inform.yaml" case20PolicyYamlChangeEnforce string = "../resources/case20_delete_objects/case20_change_enforce.yaml" + case20PolicyYamlMHPDA string = "../resources/case20_delete_objects/case20_musthave_pod_deleteall.yaml" + + // For the CRD deletion test + case20ConfigPolicyCRDPath string = "../../deploy/crds/policy.open-cluster-management.io_configurationpolicies.yaml" ) var _ = Describe("Test status fields being set for object deletion", func() { @@ -532,3 +538,69 @@ var _ = Describe("Test objects that should be deleted are actually being deleted }) }) }) + +var _ = Describe("Test objects are not deleted when the CRD is removed", Ordered, func() { + AfterAll(func() { + deleteConfigPolicies([]string{case20ConfigPolicyNameMHPDA}) + utils.Kubectl("apply", "-f", case20ConfigPolicyCRDPath) + }) + + It("creates the policy to manage a pod", func() { + By("Creating " + case20ConfigPolicyNameMHPDA + " on managed") + utils.Kubectl("apply", "-f", case20PolicyYamlMHPDA, "-n", testNamespace) + plc := utils.GetWithTimeout(clientManagedDynamic, gvrConfigPolicy, + case20ConfigPolicyNameMHPDA, testNamespace, true, defaultTimeoutSeconds) + Expect(plc).NotTo(BeNil()) + Eventually(func() interface{} { + managedPlc := utils.GetWithTimeout(clientManagedDynamic, gvrConfigPolicy, + case20ConfigPolicyNameMHPDA, testNamespace, true, defaultTimeoutSeconds) + + return utils.GetComplianceState(managedPlc) + }, defaultTimeoutSeconds, 1).Should(Equal("Compliant")) + }) + + It("deletes the ConfigurationPolicy CRD and compares the pod UID before and after", func() { + By("Getting the pod UID") + oldPodUID := "" + Eventually(func() interface{} { + pod := utils.GetWithTimeout(clientManagedDynamic, gvrPod, + case20PodMHPDAName, "default", true, defaultTimeoutSeconds) + + oldPodUID = string(pod.GetUID()) + + return pod + }, defaultTimeoutSeconds, 1).Should(Not(BeNil())) + + By("Deleting the ConfigurationPolicy CRD") + utils.Kubectl("delete", "-f", case20ConfigPolicyCRDPath) + + By("Checking that the ConfigurationPolicy is gone") + namespace := clientManagedDynamic.Resource(gvrConfigPolicy).Namespace(testNamespace) + _, err := namespace.Get(context.TODO(), case20ConfigPolicyNameMHPDA, metav1.GetOptions{}) + Expect(err).NotTo(BeNil()) + Expect(err.Error()).To(ContainSubstring("the server could not find the requested resource")) + + By("Recreating the CRD") + utils.Kubectl("apply", "-f", case20ConfigPolicyCRDPath) + + By("Recreating the ConfigurationPolicy") + utils.Kubectl("apply", "-f", case20PolicyYamlMHPDA, "-n", testNamespace) + plc := utils.GetWithTimeout(clientManagedDynamic, gvrConfigPolicy, + case20ConfigPolicyNameMHPDA, testNamespace, true, defaultTimeoutSeconds) + Expect(plc).NotTo(BeNil()) + Eventually(func() interface{} { + managedPlc := utils.GetWithTimeout(clientManagedDynamic, gvrConfigPolicy, + case20ConfigPolicyNameMHPDA, testNamespace, true, defaultTimeoutSeconds) + + return utils.GetComplianceState(managedPlc) + }, defaultTimeoutSeconds, 1).Should(Equal("Compliant")) + + By("Checking the pod UID") + Eventually(func() interface{} { + pod := utils.GetWithTimeout(clientManagedDynamic, gvrPod, + case20PodMHPDAName, "default", true, defaultTimeoutSeconds) + + return string(pod.GetUID()) + }, defaultTimeoutSeconds, 1).Should(Equal(oldPodUID)) + }) +}) diff --git a/test/resources/case20_delete_objects/case20_musthave_pod_deleteall.yaml b/test/resources/case20_delete_objects/case20_musthave_pod_deleteall.yaml new file mode 100644 index 00000000..8a0fb38a --- /dev/null +++ b/test/resources/case20_delete_objects/case20_musthave_pod_deleteall.yaml @@ -0,0 +1,23 @@ +apiVersion: policy.open-cluster-management.io/v1 +kind: ConfigurationPolicy +metadata: + name: policy-pod-mhpda +spec: + remediationAction: enforce + namespaceSelector: + exclude: ["kube-*"] + include: ["default"] + pruneObjectBehavior: DeleteAll + object-templates: + - complianceType: musthave + objectDefinition: + apiVersion: v1 + kind: Pod + metadata: + name: nginx-pod-e2e20-mhpda + spec: + containers: + - image: nginx:1.7.9 + name: nginx + ports: + - containerPort: 80