From 821f97135e77d12fc9f2705153a48a7317a57302 Mon Sep 17 00:00:00 2001 From: mprahl Date: Tue, 7 Feb 2023 17:32:57 -0500 Subject: [PATCH] Trigger uninstalls through a new annotation Previous to this, a finalizer on the Deployment was added so that if the Deployment was deleted, it would handle immediate clean up. This doesn't handle the common case where the config-policy-controller ManagedClusterAddOn is deleted, which causes the ManifestWork to be deleted, which triggers all Configuration Policy controller deployment artifacts, including the service account. A new approach is taken so that a new annotation of policy.open-cluster-management.io/uninstalling=true is set on the Deployment to indicate that the Configuration Policy controller should remove all finalizers because it's getting deleted. The Policy Addon controller will be updated so that when the config-policy-controller ManagedClusterAddOn object is deleted, a finalizer will prevent it and a Pod will run on the managed cluster with the new `trigger-uninstall` subcommand. This sets the uninstalling annotation on the Deployment and then waits until all ConfigurationPolicy finalizers have been removed. Once the command ends, the Pod exits, and the ManagedClusterAddOn object's finalizer is removed and the uninstall proceeds. Relates: https://issues.redhat.com/browse/ACM-3233 https://issues.redhat.com/browse/ACM-2923 Signed-off-by: mprahl --- .github/workflows/kind.yml | 15 ++- Makefile | 8 +- build/Dockerfile | 2 +- controllers/configurationpolicy_controller.go | 104 ++++----------- controllers/configurationpolicy_utils.go | 1 - deploy/manager/manager.yaml | 1 + deploy/operator.yaml | 1 + main.go | 105 ++++++++++++--- main_test.go | 5 +- pkg/common/common.go | 2 + pkg/triggeruninstall/triggeruninstall.go | 112 ++++++++++++++++ test/e2e/case29_trigger_uninstall_test.go | 121 ++++++++++++++++++ .../case29_trigger_uninstall/policy.yaml | 17 +++ .../case29_trigger_uninstall/policy2.yaml | 17 +++ 14 files changed, 407 insertions(+), 104 deletions(-) create mode 100644 pkg/triggeruninstall/triggeruninstall.go create mode 100644 test/e2e/case29_trigger_uninstall_test.go create mode 100644 test/resources/case29_trigger_uninstall/policy.yaml create mode 100644 test/resources/case29_trigger_uninstall/policy2.yaml diff --git a/.github/workflows/kind.yml b/.github/workflows/kind.yml index 7ad4e0de..cc7b17db 100644 --- a/.github/workflows/kind.yml +++ b/.github/workflows/kind.yml @@ -87,17 +87,22 @@ jobs: export GOPATH=$(go env GOPATH) KUBECONFIG=${PWD}/kubeconfig_managed make e2e-test-hosted-mode-coverage + - name: Verify Deployment Configuration + run: | + make build-images + KUBECONFIG=${PWD}/kubeconfig_managed_e2e make kind-deploy-controller-dev + + - name: E2E tests that require the controller running in a cluster + run: | + export GOPATH=$(go env GOPATH) + KUBECONFIG=${PWD}/kubeconfig_managed make e2e-test-running-in-cluster + - name: Test Coverage Verification if: ${{ github.event_name == 'pull_request' }} run: | make test-coverage make coverage-verify - - name: Verify Deployment Configuration - run: | - make build-images - KUBECONFIG=${PWD}/kubeconfig_managed_e2e make kind-deploy-controller-dev - - name: Debug if: ${{ failure() }} run: | diff --git a/Makefile b/Makefile index 64c3c1f8..8983c69d 100644 --- a/Makefile +++ b/Makefile @@ -334,15 +334,19 @@ e2e-test: e2e-dependencies $(GINKGO) -v --fail-fast --slow-spec-threshold=10s $(E2E_TEST_ARGS) test/e2e .PHONY: e2e-test-coverage -e2e-test-coverage: E2E_TEST_ARGS = --json-report=report_e2e.json --label-filter='!hosted-mode' --output-dir=. +e2e-test-coverage: E2E_TEST_ARGS = --json-report=report_e2e.json --label-filter='!hosted-mode && !running-in-cluster' --output-dir=. e2e-test-coverage: e2e-run-instrumented e2e-test e2e-stop-instrumented .PHONY: e2e-test-hosted-mode-coverage -e2e-test-hosted-mode-coverage: E2E_TEST_ARGS = --json-report=report_e2e_hosted_mode.json --label-filter="hosted-mode" --output-dir=. +e2e-test-hosted-mode-coverage: E2E_TEST_ARGS = --json-report=report_e2e_hosted_mode.json --label-filter="hosted-mode && !running-in-cluster" --output-dir=. e2e-test-hosted-mode-coverage: COVERAGE_E2E_OUT = coverage_e2e_hosted_mode.out e2e-test-hosted-mode-coverage: export TARGET_KUBECONFIG_PATH = $(PWD)/kubeconfig_managed2 e2e-test-hosted-mode-coverage: e2e-run-instrumented e2e-test e2e-stop-instrumented +.PHONY: e2e-test-running-in-cluster +e2e-test-running-in-cluster: E2E_TEST_ARGS = --label-filter="running-in-cluster" -covermode=atomic --coverprofile=coverage_e2e_uninstall.out --coverpkg=open-cluster-management.io/config-policy-controller/pkg/triggeruninstall +e2e-test-running-in-cluster: e2e-test + .PHONY: e2e-build-instrumented e2e-build-instrumented: go test -covermode=atomic -coverpkg=$(shell cat go.mod | head -1 | cut -d ' ' -f 2)/... -c -tags e2e ./ -o build/_output/bin/$(IMG)-instrumented diff --git a/build/Dockerfile b/build/Dockerfile index 1c7a4e5f..a02f08fe 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -24,7 +24,7 @@ COPY --from=builder ${REPO_PATH}/build/_output/bin/${COMPONENT} ${OPERATOR} COPY --from=builder ${REPO_PATH}/build/bin /usr/local/bin RUN /usr/local/bin/user_setup -ENTRYPOINT ["/usr/local/bin/entrypoint"] +ENTRYPOINT ["/usr/local/bin/entrypoint", "controller"] RUN microdnf update && \ microdnf clean all diff --git a/controllers/configurationpolicy_controller.go b/controllers/configurationpolicy_controller.go index 2a60baae..16727e23 100644 --- a/controllers/configurationpolicy_controller.go +++ b/controllers/configurationpolicy_controller.go @@ -163,20 +163,11 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies( } const waiting = 10 * time.Minute - var exiting bool - // Loop twice after exit condition is received to account for race conditions and retries. - loopsAfterExit := 2 - for !exiting || (exiting && loopsAfterExit > 0) { - start := time.Now() - - select { - case <-ctx.Done(): - exiting = true - loopsAfterExit-- - default: - } + exiting := false + for !exiting { + start := time.Now() policiesList := policyv1.ConfigurationPolicyList{} var skipLoop bool @@ -206,28 +197,6 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies( skipLoop = true } - needDeploymentFinalizer := false - - for i := range policiesList.Items { - plc := policiesList.Items[i] - - if objHasFinalizer(&plc, pruneObjectFinalizer) { - needDeploymentFinalizer = true - - break - } - } - - if err := r.manageDeploymentFinalizer(needDeploymentFinalizer); err != nil { - if errors.Is(err, common.ErrNoNamespace) || errors.Is(err, common.ErrRunLocal) { - log.Info("Not managing the controller's deployment finalizer because it is running locally") - } else { - log.Error(err, "Failed to manage the controller's deployment finalizer, skipping loop") - - skipLoop = true - } - } - cleanupImmediately, err := r.cleanupImmediately() if err != nil { log.Error(err, "Failed to determine if it's time to cleanup immediately") @@ -281,6 +250,12 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies( log.V(2).Info("Sleeping before reprocessing the configuration policies", "seconds", sleepTime) time.Sleep(sleepTime) } + + select { + case <-ctx.Done(): + exiting = true + default: + } } } @@ -618,10 +593,10 @@ func (r *ConfigurationPolicyReconciler) cleanUpChildObjects(plc policyv1.Configu // cleanupImmediately returns true when the cluster is in a state where configurationpolicies should // be removed as soon as possible, ignoring the pruneObjectBehavior of the policies. This is the -// case when the CRD or the controller's deployment are already being deleted. +// case when the controller is being uninstalled or the CRD is being deleted. func (r *ConfigurationPolicyReconciler) cleanupImmediately() (bool, error) { - deployDeleting, deployErr := r.deploymentIsDeleting() - if deployErr == nil && deployDeleting { + beingUninstalled, beingUninstalledErr := r.isBeingUninstalled() + if beingUninstalledErr == nil && beingUninstalled { return true, nil } @@ -630,36 +605,16 @@ func (r *ConfigurationPolicyReconciler) cleanupImmediately() (bool, error) { return true, nil } - if deployErr == nil && defErr == nil { + if beingUninstalledErr == nil && defErr == nil { // if either was deleting, we would've already returned. return false, nil } // At least one had an unexpected error, so the decision can't be made right now //nolint:errorlint // we can't choose just one of the errors to "correctly" wrap - return false, fmt.Errorf("deploymentIsDeleting error: '%v', definitionIsDeleting error: '%v'", - deployErr, defErr) -} - -func (r *ConfigurationPolicyReconciler) deploymentIsDeleting() (bool, error) { - key, keyErr := common.GetOperatorNamespacedName() - if keyErr != nil { - if errors.Is(keyErr, common.ErrNoNamespace) || errors.Is(keyErr, common.ErrRunLocal) { - // running locally - return false, nil - } - - return false, keyErr - } - - deployment := appsv1.Deployment{} - - err := r.Get(context.TODO(), key, &deployment) - if err != nil { - return false, err - } - - return deployment.DeletionTimestamp != nil, nil + return false, fmt.Errorf( + "isBeingUninstalled error: '%v', definitionIsDeleting error: '%v'", beingUninstalledErr, defErr, + ) } func (r *ConfigurationPolicyReconciler) definitionIsDeleting() (bool, error) { @@ -2808,32 +2763,23 @@ func convertPolicyStatusToString(plc *policyv1.ConfigurationPolicy) (results str return result } -func (r *ConfigurationPolicyReconciler) manageDeploymentFinalizer(shouldBeSet bool) error { +func (r *ConfigurationPolicyReconciler) isBeingUninstalled() (bool, error) { key, err := common.GetOperatorNamespacedName() if err != nil { - return err + // Running locally + if errors.Is(err, common.ErrNoNamespace) || errors.Is(err, common.ErrRunLocal) { + return false, nil + } + + return false, err } deployment := appsv1.Deployment{} if err := r.Client.Get(context.TODO(), key, &deployment); err != nil { - return err - } - - if objHasFinalizer(&deployment, pruneObjectFinalizer) { - if shouldBeSet { - return nil - } - - deployment.SetFinalizers(removeObjFinalizer(&deployment, pruneObjectFinalizer)) - } else { - if !shouldBeSet { - return nil - } - - deployment.SetFinalizers(addObjFinalizer(&deployment, pruneObjectFinalizer)) + return false, err } - return r.Update(context.TODO(), &deployment) + return deployment.Annotations[common.UninstallingAnnotation] == "true", nil } func recoverFlow() { diff --git a/controllers/configurationpolicy_utils.go b/controllers/configurationpolicy_utils.go index d1de1d78..da5e4bb6 100644 --- a/controllers/configurationpolicy_utils.go +++ b/controllers/configurationpolicy_utils.go @@ -549,7 +549,6 @@ func addObjFinalizer(obj metav1.Object, finalizer string) []string { return append(obj.GetFinalizers(), finalizer) } -// nolint: unparam func removeObjFinalizer(obj metav1.Object, finalizer string) []string { result := []string{} diff --git a/deploy/manager/manager.yaml b/deploy/manager/manager.yaml index fe675cff..7c295638 100644 --- a/deploy/manager/manager.yaml +++ b/deploy/manager/manager.yaml @@ -20,6 +20,7 @@ spec: command: - config-policy-controller args: + - "controller" - "--enable-lease=true" - "--log-level=2" - "--v=0" diff --git a/deploy/operator.yaml b/deploy/operator.yaml index f4587cd8..9fa9a025 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -45,6 +45,7 @@ spec: spec: containers: - args: + - controller - --enable-lease=true - --log-level=2 - --v=0 diff --git a/main.go b/main.go index c9e25bd8..4a6f7bcc 100644 --- a/main.go +++ b/main.go @@ -11,6 +11,7 @@ import ( "os" "runtime" "strings" + "time" "github.com/go-logr/zapr" "github.com/spf13/pflag" @@ -43,6 +44,7 @@ import ( policyv1 "open-cluster-management.io/config-policy-controller/api/v1" "open-cluster-management.io/config-policy-controller/controllers" "open-cluster-management.io/config-policy-controller/pkg/common" + "open-cluster-management.io/config-policy-controller/pkg/triggeruninstall" "open-cluster-management.io/config-policy-controller/version" ) @@ -74,51 +76,52 @@ func main() { } zflags.Bind(flag.CommandLine) - pflag.CommandLine.AddGoFlagSet(flag.CommandLine) + + controllerFlagSet := pflag.NewFlagSet("controller", pflag.ExitOnError) var clusterName, hubConfigPath, targetKubeConfig, metricsAddr, probeAddr string var frequency uint var decryptionConcurrency, evaluationConcurrency uint8 var enableLease, enableLeaderElection, legacyLeaderElection, enableMetrics bool - pflag.UintVar(&frequency, "update-frequency", 10, + controllerFlagSet.UintVar(&frequency, "update-frequency", 10, "The status update frequency (in seconds) of a mutation policy") - pflag.BoolVar(&enableLease, "enable-lease", false, + controllerFlagSet.BoolVar(&enableLease, "enable-lease", false, "If enabled, the controller will start the lease controller to report its status") - pflag.StringVar(&clusterName, "cluster-name", "acm-managed-cluster", "Name of the cluster") - pflag.StringVar(&hubConfigPath, "hub-kubeconfig-path", "/var/run/klusterlet/kubeconfig", + controllerFlagSet.StringVar(&clusterName, "cluster-name", "acm-managed-cluster", "Name of the cluster") + controllerFlagSet.StringVar(&hubConfigPath, "hub-kubeconfig-path", "/var/run/klusterlet/kubeconfig", "Path to the hub kubeconfig") - pflag.StringVar( + controllerFlagSet.StringVar( &targetKubeConfig, "target-kubeconfig-path", "", "A path to an alternative kubeconfig for policy evaluation and enforcement.", ) - pflag.StringVar( + controllerFlagSet.StringVar( &metricsAddr, "metrics-bind-address", "localhost:8383", "The address the metrics endpoint binds to.", ) - pflag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") - pflag.BoolVar(&enableLeaderElection, "leader-elect", true, + controllerFlagSet.StringVar( + &probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.", + ) + controllerFlagSet.BoolVar(&enableLeaderElection, "leader-elect", true, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") - pflag.BoolVar(&legacyLeaderElection, "legacy-leader-elect", false, + controllerFlagSet.BoolVar(&legacyLeaderElection, "legacy-leader-elect", false, "Use a legacy leader election method for controller manager instead of the lease API.") - pflag.Uint8Var( + controllerFlagSet.Uint8Var( &decryptionConcurrency, "decryption-concurrency", 5, "The max number of concurrent policy template decryptions", ) - pflag.Uint8Var( + controllerFlagSet.Uint8Var( &evaluationConcurrency, "evaluation-concurrency", // Set a low default to not add too much load to the Kubernetes API server in resource constrained deployments. 2, "The max number of concurrent configuration policy evaluations", ) - pflag.BoolVar(&enableMetrics, "enable-metrics", true, "Disable custom metrics collection") - - pflag.Parse() + controllerFlagSet.BoolVar(&enableMetrics, "enable-metrics", true, "Disable custom metrics collection") ctrlZap, err := zflags.BuildForCtrl() if err != nil { @@ -142,6 +145,24 @@ func main() { klog.SetLogger(zapr.NewLogger(klogZap).WithName("klog")) } + subcommand := "" + if len(os.Args) >= 2 { + subcommand = os.Args[1] + } + + switch subcommand { + case "controller": + controllerFlagSet.AddGoFlagSet(flag.CommandLine) + _ = controllerFlagSet.Parse(os.Args[2:]) + case "trigger-uninstall": + handleTriggerUninstall() + + return + default: + fmt.Fprintln(os.Stderr, "expected 'controller' or 'trigger-uninstall' subcommands") + os.Exit(1) + } + if evaluationConcurrency < 1 { panic("The --evaluation-concurrency option cannot be less than 1") } @@ -372,3 +393,57 @@ func main() { os.Exit(1) } } + +func handleTriggerUninstall() { + triggerUninstallFlagSet := pflag.NewFlagSet("trigger-uninstall", pflag.ExitOnError) + + var deploymentName, deploymentNamespace, policyNamespace string + var timeoutSeconds uint + + triggerUninstallFlagSet.StringVar( + &deploymentName, "deployment-name", "config-policy-controller", "The name of the controller Deployment object", + ) + triggerUninstallFlagSet.StringVar( + &deploymentNamespace, + "deployment-namespace", + "open-cluster-management-agent-addon", + "The namespace of the controller Deployment object", + ) + triggerUninstallFlagSet.StringVar( + &policyNamespace, "policy-namespace", "", "The namespace of where ConfigurationPolicy objects are stored", + ) + triggerUninstallFlagSet.UintVar( + &timeoutSeconds, "timeout-seconds", 300, "The number of seconds before the operation is canceled", + ) + triggerUninstallFlagSet.AddGoFlagSet(flag.CommandLine) + + _ = triggerUninstallFlagSet.Parse(os.Args[2:]) + + if deploymentName == "" || deploymentNamespace == "" || policyNamespace == "" { + fmt.Fprintln(os.Stderr, "--deployment-name, --deployment-namespace, --policy-namespace must all have values") + os.Exit(1) + } + + if timeoutSeconds < 30 { + fmt.Fprintln(os.Stderr, "--timeout-seconds must be set to at least 30 seconds") + os.Exit(1) + } + + terminatingCtx := ctrl.SetupSignalHandler() + ctx, cancelCtx := context.WithDeadline(terminatingCtx, time.Now().Add(time.Duration(timeoutSeconds)*time.Second)) + + defer cancelCtx() + + // Get a config to talk to the apiserver + cfg, err := config.GetConfig() + if err != nil { + log.Error(err, "Failed to get config") + os.Exit(1) + } + + err = triggeruninstall.TriggerUninstall(ctx, cfg, deploymentName, deploymentNamespace, policyNamespace) + if err != nil { + klog.Errorf("Failed to trigger the uninstall due to the error: %s", err) + os.Exit(1) + } +} diff --git a/main_test.go b/main_test.go index 8d42d635..e929e2ec 100644 --- a/main_test.go +++ b/main_test.go @@ -15,8 +15,11 @@ import ( // TestRunMain wraps the main() function in order to build a test binary and collection coverage for // E2E/Integration tests. Controller CLI flags are also passed in here. func TestRunMain(t *testing.T) { + args := append([]string{os.Args[1], "controller"}, os.Args[2:]...) os.Args = append( - os.Args, "--leader-elect=false", fmt.Sprintf("--target-kubeconfig-path=%s", os.Getenv("TARGET_KUBECONFIG_PATH")), + args, + "--leader-elect=false", + fmt.Sprintf("--target-kubeconfig-path=%s", os.Getenv("TARGET_KUBECONFIG_PATH")), ) main() diff --git a/pkg/common/common.go b/pkg/common/common.go index 5b0b02c9..6b78a524 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -11,6 +11,8 @@ import ( "k8s.io/client-go/tools/record" ) +const UninstallingAnnotation string = "policy.open-cluster-management.io/uninstalling" + // CreateRecorder return recorder func CreateRecorder(kubeClient kubernetes.Interface, componentName string) (record.EventRecorder, error) { eventsScheme := runtime.NewScheme() diff --git a/pkg/triggeruninstall/triggeruninstall.go b/pkg/triggeruninstall/triggeruninstall.go new file mode 100644 index 00000000..5dd0b069 --- /dev/null +++ b/pkg/triggeruninstall/triggeruninstall.go @@ -0,0 +1,112 @@ +// Copyright Contributors to the Open Cluster Management project + +package triggeruninstall + +import ( + "context" + "fmt" + "time" + + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/klog" + + policyv1 "open-cluster-management.io/config-policy-controller/api/v1" + "open-cluster-management.io/config-policy-controller/pkg/common" +) + +// TriggerUninstall will add an annotation to the controller's Deployment indicating that the controller needs to +// prepare to be uninstalled. This function will run until all ConfigurationPolicy objects have no finalizers. +func TriggerUninstall( + ctx context.Context, config *rest.Config, deploymentName, deploymentNamespace, policyNamespace string, +) error { + client := kubernetes.NewForConfigOrDie(config) + dynamicClient := dynamic.NewForConfigOrDie(config) + + for { + klog.Info("Setting the Deployment uninstall annotation") + var err error + + select { + case <-ctx.Done(): + return fmt.Errorf("context canceled before the uninstallation preparation was complete") + default: + } + + deploymentRsrc := client.AppsV1().Deployments(deploymentNamespace) + + deployment, err := deploymentRsrc.Get(ctx, deploymentName, metav1.GetOptions{}) + if err != nil { + return err + } + + annotations := deployment.GetAnnotations() + annotations[common.UninstallingAnnotation] = "true" + deployment.SetAnnotations(annotations) + + _, err = deploymentRsrc.Update(ctx, deployment, metav1.UpdateOptions{}) + if err != nil { + if k8serrors.IsServerTimeout(err) || k8serrors.IsTimeout(err) || k8serrors.IsConflict(err) { + klog.Infof("Retrying setting the Deployment uninstall annotation due to error: %s", err) + + continue + } + + return err + } + + break + } + + configPolicyGVR := schema.GroupVersionResource{ + Group: policyv1.GroupVersion.Group, + Version: policyv1.GroupVersion.Version, + Resource: "configurationpolicies", + } + + for { + klog.Info("Checking if the uninstall preparation is complete") + + select { + case <-ctx.Done(): + return fmt.Errorf("context canceled before the uninstallation preparation was complete") + default: + } + + configPolicies, err := dynamicClient.Resource(configPolicyGVR).Namespace(policyNamespace).List( + ctx, metav1.ListOptions{}, + ) + if err != nil { + if k8serrors.IsServerTimeout(err) || k8serrors.IsTimeout(err) { + klog.Infof("Retrying listing the ConfigurationPolicy objects due to error: %s", err) + + continue + } + + return err + } + + cleanedUp := true + + for _, configPolicy := range configPolicies.Items { + if len(configPolicy.GetFinalizers()) != 0 { + cleanedUp = false + + break + } + } + + if cleanedUp { + break + } + + klog.Info("The uninstall preparation is not complete. Sleeping two seconds before checking again.") + time.Sleep(2 * time.Second) + } + + return nil +} diff --git a/test/e2e/case29_trigger_uninstall_test.go b/test/e2e/case29_trigger_uninstall_test.go new file mode 100644 index 00000000..617a0946 --- /dev/null +++ b/test/e2e/case29_trigger_uninstall_test.go @@ -0,0 +1,121 @@ +// Copyright Contributors to the Open Cluster Management project + +package e2e + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "open-cluster-management.io/config-policy-controller/pkg/common" + "open-cluster-management.io/config-policy-controller/pkg/triggeruninstall" + "open-cluster-management.io/config-policy-controller/test/utils" +) + +// This test only works when the controller is running in the cluster. +var _ = Describe("Clean up during uninstalls", Label("running-in-cluster"), Ordered, func() { + const ( + configMapName string = "case29-trigger-uninstall" + deploymentName string = "config-policy-controller" + deploymentNamespace string = "open-cluster-management-agent-addon" + policyName string = "case29-trigger-uninstall" + policy2Name string = "case29-trigger-uninstall2" + policyYAMLPath string = "../resources/case29_trigger_uninstall/policy.yaml" + policy2YAMLPath string = "../resources/case29_trigger_uninstall/policy2.yaml" + pruneObjectFinalizer string = "policy.open-cluster-management.io/delete-related-objects" + ) + + It("verifies that finalizers are removed when being uninstalled", func() { + By("Creating two configuration policies with pruneObjectBehavior") + utils.Kubectl("apply", "-f", policyYAMLPath, "-n", testNamespace) + utils.Kubectl("apply", "-f", policy2YAMLPath, "-n", testNamespace) + + By("Verifying that the configuration policies are compliant and have finalizers") + Eventually(func(g Gomega) { + policy := utils.GetWithTimeout( + clientManagedDynamic, gvrConfigPolicy, policyName, testNamespace, true, defaultTimeoutSeconds, + ) + g.Expect(utils.GetComplianceState(policy)).To(Equal("Compliant")) + + g.Expect(policy.GetFinalizers()).To(ContainElement(pruneObjectFinalizer)) + }, defaultTimeoutSeconds, 1).Should(Succeed()) + + Eventually(func(g Gomega) { + policy2 := utils.GetWithTimeout( + clientManagedDynamic, gvrConfigPolicy, policy2Name, testNamespace, true, defaultTimeoutSeconds, + ) + g.Expect(utils.GetComplianceState(policy2)).To(Equal("Compliant")) + + g.Expect(policy2.GetFinalizers()).To(ContainElement(pruneObjectFinalizer)) + }, defaultTimeoutSeconds, 1).Should(Succeed()) + + By("Triggering an uninstall") + config, err := LoadConfig("", kubeconfigManaged, "") + Expect(err).To(BeNil()) + + ctx, ctxCancel := context.WithDeadline( + context.Background(), + // Cancel the context after the default timeout seconds to avoid the test running forever if it doesn't + // exit cleanly before then. + time.Now().Add(time.Duration(defaultTimeoutSeconds)*time.Second), + ) + defer ctxCancel() + + err = triggeruninstall.TriggerUninstall(ctx, config, deploymentName, deploymentNamespace, testNamespace) + Expect(err).To(BeNil()) + + By("Verifying that the uninstall annotation was set on the Deployment") + deployment, err := clientManaged.AppsV1().Deployments(deploymentNamespace).Get( + context.TODO(), deploymentName, metav1.GetOptions{}, + ) + Expect(err).To(BeNil()) + Expect(deployment.GetAnnotations()).To(HaveKeyWithValue(common.UninstallingAnnotation, "true")) + + By("Verifying that the ConfiguratioPolicy finalizers have been removed") + policy := utils.GetWithTimeout( + clientManagedDynamic, gvrConfigPolicy, policyName, testNamespace, true, defaultTimeoutSeconds, + ) + Expect(policy.GetFinalizers()).To(HaveLen(0)) + + policy2 := utils.GetWithTimeout( + clientManagedDynamic, gvrConfigPolicy, policy2Name, testNamespace, true, defaultTimeoutSeconds, + ) + Expect(policy2.GetFinalizers()).To(HaveLen(0)) + }) + + AfterAll(func() { + deleteConfigPolicies([]string{policyName, policy2Name}) + + err := clientManaged.CoreV1().ConfigMaps("default").Delete( + context.TODO(), configMapName, metav1.DeleteOptions{}, + ) + if !k8serrors.IsNotFound(err) { + Expect(err).To(BeNil()) + } + + // Use an eventually in case there are update conflicts and there needs to be a retry + Eventually(func(g Gomega) { + deployment, err := clientManaged.AppsV1().Deployments(deploymentNamespace).Get( + context.TODO(), deploymentName, metav1.GetOptions{}, + ) + g.Expect(err).To(BeNil()) + + annotations := deployment.GetAnnotations() + if _, ok := annotations[common.UninstallingAnnotation]; !ok { + return + } + + delete(annotations, common.UninstallingAnnotation) + deployment.SetAnnotations(annotations) + + _, err = clientManaged.AppsV1().Deployments(deploymentNamespace).Update( + context.TODO(), deployment, metav1.UpdateOptions{}, + ) + g.Expect(err).To(BeNil()) + }, defaultTimeoutSeconds, 1).Should(Succeed()) + }) +}) diff --git a/test/resources/case29_trigger_uninstall/policy.yaml b/test/resources/case29_trigger_uninstall/policy.yaml new file mode 100644 index 00000000..2880cd3f --- /dev/null +++ b/test/resources/case29_trigger_uninstall/policy.yaml @@ -0,0 +1,17 @@ +apiVersion: policy.open-cluster-management.io/v1 +kind: ConfigurationPolicy +metadata: + name: case29-trigger-uninstall +spec: + remediationAction: enforce + pruneObjectBehavior: DeleteAll + object-templates: + - complianceType: musthave + objectDefinition: + apiVersion: v1 + kind: ConfigMap + metadata: + name: case29-trigger-uninstall + namespace: default + data: + city: Raleigh diff --git a/test/resources/case29_trigger_uninstall/policy2.yaml b/test/resources/case29_trigger_uninstall/policy2.yaml new file mode 100644 index 00000000..ff7596bb --- /dev/null +++ b/test/resources/case29_trigger_uninstall/policy2.yaml @@ -0,0 +1,17 @@ +apiVersion: policy.open-cluster-management.io/v1 +kind: ConfigurationPolicy +metadata: + name: case29-trigger-uninstall2 +spec: + remediationAction: enforce + pruneObjectBehavior: DeleteAll + object-templates: + - complianceType: musthave + objectDefinition: + apiVersion: v1 + kind: ConfigMap + metadata: + name: case29-trigger-uninstall + namespace: default + data: + state: NC