From 336b4f28200a1881e4500eaea6526d2e4258a74b Mon Sep 17 00:00:00 2001 From: zhzhuang-zju Date: Mon, 4 Nov 2024 09:59:11 +0800 Subject: [PATCH] enhanced forced deletion on karmadactl unjoin Signed-off-by: zhzhuang-zju --- pkg/karmadactl/unjoin/unjoin.go | 47 +--------- pkg/karmadactl/util/cluster.go | 151 ++++++++++++++++++++++++++++++++ test/e2e/rescheduling_test.go | 3 + 3 files changed, 158 insertions(+), 43 deletions(-) create mode 100644 pkg/karmadactl/util/cluster.go diff --git a/pkg/karmadactl/unjoin/unjoin.go b/pkg/karmadactl/unjoin/unjoin.go index 2d5af8fcfeb1..a32c1086b027 100644 --- a/pkg/karmadactl/unjoin/unjoin.go +++ b/pkg/karmadactl/unjoin/unjoin.go @@ -17,15 +17,11 @@ limitations under the License. package unjoin import ( - "context" "fmt" "time" "github.com/spf13/cobra" "github.com/spf13/pflag" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" kubeclient "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/klog/v2" @@ -65,7 +61,7 @@ func NewCmdUnjoin(f cmdutil.Factory, parentCommand string) *cobra.Command { Example: fmt.Sprintf(unjoinExample, parentCommand), SilenceUsage: true, DisableFlagsInUseLine: true, - RunE: func(cmd *cobra.Command, args []string) error { + RunE: func(_ *cobra.Command, args []string) error { if err := opts.Complete(args); err != nil { return err } @@ -149,7 +145,7 @@ func (j *CommandUnjoinOption) AddFlags(flags *pflag.FlagSet) { flags.StringVar(&j.ClusterKubeConfig, "cluster-kubeconfig", "", "Path of the cluster's kubeconfig.") flags.BoolVar(&j.forceDeletion, "force", false, - "Delete cluster and secret resources even if resources in the cluster targeted for unjoin are not removed successfully.") + "When set, the unjoin command will attempt to clean up resources in the member cluster before deleting the Cluster object. If the cleanup fails within the timeout period, the Cluster object will still be deleted, potentially leaving some resources behind in the member cluster.") flags.DurationVar(&j.Wait, "wait", 60*time.Second, "wait for the unjoin command execution process(default 60s), if there is no success after this time, timeout will be returned.") flags.BoolVar(&j.DryRun, "dry-run", false, "Run the command in dry-run mode, without making any server requests.") } @@ -183,9 +179,10 @@ func (j *CommandUnjoinOption) Run(f cmdutil.Factory) error { // RunUnJoinCluster unJoin the cluster from karmada. func (j *CommandUnjoinOption) RunUnJoinCluster(controlPlaneRestConfig, clusterConfig *rest.Config) error { controlPlaneKarmadaClient := karmadaclientset.NewForConfigOrDie(controlPlaneRestConfig) + controlPlaneKubeClient := kubeclient.NewForConfigOrDie(controlPlaneRestConfig) // delete the cluster object in host cluster that associates the unjoining cluster - err := j.deleteClusterObject(controlPlaneKarmadaClient) + err := cmdutil.DeleteClusterObject(controlPlaneKubeClient, controlPlaneKarmadaClient, j.ClusterName, j.Wait, j.DryRun, j.forceDeletion) if err != nil { klog.Errorf("Failed to delete cluster object. cluster name: %s, error: %v", j.ClusterName, err) return err @@ -223,42 +220,6 @@ func (j *CommandUnjoinOption) RunUnJoinCluster(controlPlaneRestConfig, clusterCo return nil } -// deleteClusterObject delete the cluster object in host cluster that associates the unjoining cluster -func (j *CommandUnjoinOption) deleteClusterObject(controlPlaneKarmadaClient *karmadaclientset.Clientset) error { - if j.DryRun { - return nil - } - - err := controlPlaneKarmadaClient.ClusterV1alpha1().Clusters().Delete(context.TODO(), j.ClusterName, metav1.DeleteOptions{}) - if apierrors.IsNotFound(err) { - return fmt.Errorf("no cluster object %s found in karmada control Plane", j.ClusterName) - } - if err != nil { - klog.Errorf("Failed to delete cluster object. cluster name: %s, error: %v", j.ClusterName, err) - return err - } - - // make sure the given cluster object has been deleted - err = wait.Poll(1*time.Second, j.Wait, func() (done bool, err error) { - _, err = controlPlaneKarmadaClient.ClusterV1alpha1().Clusters().Get(context.TODO(), j.ClusterName, metav1.GetOptions{}) - if apierrors.IsNotFound(err) { - return true, nil - } - if err != nil { - klog.Errorf("Failed to get cluster %s. err: %v", j.ClusterName, err) - return false, err - } - klog.Infof("Waiting for the cluster object %s to be deleted", j.ClusterName) - return false, nil - }) - if err != nil { - klog.Errorf("Failed to delete cluster object. cluster name: %s, error: %v", j.ClusterName, err) - return err - } - - return nil -} - // deleteRBACResources deletes the cluster role, cluster rolebindings from the unjoining cluster. func deleteRBACResources(clusterKubeClient kubeclient.Interface, unjoiningClusterName string, forceDeletion, dryRun bool) error { if dryRun { diff --git a/pkg/karmadactl/util/cluster.go b/pkg/karmadactl/util/cluster.go new file mode 100644 index 000000000000..f5da3f4fab35 --- /dev/null +++ b/pkg/karmadactl/util/cluster.go @@ -0,0 +1,151 @@ +/* +Copyright 2024 The Karmada Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "context" + "fmt" + "time" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + kubeclient "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + karmadaclientset "github.com/karmada-io/karmada/pkg/generated/clientset/versioned" + "github.com/karmada-io/karmada/pkg/util" + "github.com/karmada-io/karmada/pkg/util/names" +) + +// DeleteClusterObject deletes the cluster object from the Karmada control plane. +func DeleteClusterObject(controlPlaneKubeClient kubeclient.Interface, controlPlaneKarmadaClient karmadaclientset.Interface, clusterName string, + timeout time.Duration, dryRun bool, forceDeletion bool) error { + if dryRun { + return nil + } + + err := controlPlaneKarmadaClient.ClusterV1alpha1().Clusters().Delete(context.TODO(), clusterName, metav1.DeleteOptions{}) + if apierrors.IsNotFound(err) { + return fmt.Errorf("no cluster object %s found in karmada control Plane", clusterName) + } + if err != nil { + klog.Errorf("Failed to delete cluster object. cluster name: %s, error: %v", clusterName, err) + return err + } + + // make sure the given cluster object has been deleted. + // If the operation times out and `forceDeletion` is true, then force deletion begins, which involves sequentially deleting the `work`, `executionSpace`, and `cluster` finalizers. + err = wait.PollUntilContextTimeout(context.TODO(), 1*time.Second, timeout, false, func(context.Context) (done bool, err error) { + _, err = controlPlaneKarmadaClient.ClusterV1alpha1().Clusters().Get(context.TODO(), clusterName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return true, nil + } + if err != nil { + klog.Errorf("Failed to get cluster %s. err: %v", clusterName, err) + return false, err + } + klog.Infof("Waiting for the cluster object %s to be deleted", clusterName) + return false, nil + }) + + // If the Cluster object not be deleted within the timeout period, it is likely due to the resources in the member + // cluster can not be cleaned up. With the option force deletion, we will try to clean up the Cluster object by + // removing the finalizers from related resources. This behavior may result in some resources remain in the member + // clusters. + if err != nil && forceDeletion { + klog.Warningf("Deleting the cluster object timed out. cluster name: %s, error: %v", clusterName, err) + klog.Infof("Start forced deletion. cluster name: %s", clusterName) + executionSpaceName := names.GenerateExecutionSpaceName(clusterName) + err = removeWorkFinalizer(executionSpaceName, controlPlaneKarmadaClient) + if err != nil { + klog.Errorf("Force deletion. Failed to remove the finalizer of Work, error: %v", err) + } + + err = removeExecutionSpaceFinalizer(executionSpaceName, controlPlaneKubeClient) + if err != nil { + klog.Errorf("Force deletion. Failed to remove the finalizer of Namespace(%s), error: %v", executionSpaceName, err) + } + + err = removeClusterFinalizer(clusterName, controlPlaneKarmadaClient) + if err != nil { + klog.Errorf("Force deletion. Failed to remove the finalizer of Cluster(%s), error: %v", clusterName, err) + } + + klog.Infof("Forced deletion is complete.") + return nil + } + + return err +} + +// removeWorkFinalizer removes the finalizer of works in the executionSpace. +func removeWorkFinalizer(executionSpaceName string, controlPlaneKarmadaClient karmadaclientset.Interface) error { + list, err := controlPlaneKarmadaClient.WorkV1alpha1().Works(executionSpaceName).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list work in executionSpace %s", executionSpaceName) + } + + for i := range list.Items { + work := &list.Items[i] + if !controllerutil.ContainsFinalizer(work, util.ExecutionControllerFinalizer) { + continue + } + controllerutil.RemoveFinalizer(work, util.ExecutionControllerFinalizer) + _, err = controlPlaneKarmadaClient.WorkV1alpha1().Works(executionSpaceName).Update(context.TODO(), work, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to remove the finalizer of work(%s/%s)", executionSpaceName, work.GetName()) + } + } + return nil +} + +// removeExecutionSpaceFinalizer removes the finalizer of executionSpace. +func removeExecutionSpaceFinalizer(executionSpaceName string, controlPlaneKubeClient kubeclient.Interface) error { + executionSpace, err := controlPlaneKubeClient.CoreV1().Namespaces().Get(context.TODO(), executionSpaceName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get Namespace(%s)", executionSpaceName) + } + + if !controllerutil.ContainsFinalizer(executionSpace, string(corev1.FinalizerKubernetes)) { + return nil + } + + controllerutil.RemoveFinalizer(executionSpace, "kubernetes") + _, err = controlPlaneKubeClient.CoreV1().Namespaces().Update(context.TODO(), executionSpace, metav1.UpdateOptions{}) + + return err +} + +// removeClusterFinalizer removes the finalizer of cluster object. +func removeClusterFinalizer(clusterName string, controlPlaneKarmadaClient karmadaclientset.Interface) error { + cluster, err := controlPlaneKarmadaClient.ClusterV1alpha1().Clusters().Get(context.TODO(), clusterName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get Cluster(%s)", clusterName) + } + + if !controllerutil.ContainsFinalizer(cluster, util.ClusterControllerFinalizer) { + return nil + } + + controllerutil.RemoveFinalizer(cluster, util.ClusterControllerFinalizer) + _, err = controlPlaneKarmadaClient.ClusterV1alpha1().Clusters().Update(context.TODO(), cluster, metav1.UpdateOptions{}) + + return err +} diff --git a/test/e2e/rescheduling_test.go b/test/e2e/rescheduling_test.go index 6e83edf8461f..5f9d67ef4373 100644 --- a/test/e2e/rescheduling_test.go +++ b/test/e2e/rescheduling_test.go @@ -19,6 +19,7 @@ package e2e import ( "fmt" "os" + "time" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" @@ -136,6 +137,7 @@ var _ = ginkgo.Describe("[cluster unjoined] reschedule testing", func() { opts := unjoin.CommandUnjoinOption{ ClusterNamespace: "karmada-cluster", ClusterName: newClusterName, + Wait: 60 * time.Second, } err := opts.Run(f) gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) @@ -194,6 +196,7 @@ var _ = ginkgo.Describe("[cluster joined] reschedule testing", func() { opts := unjoin.CommandUnjoinOption{ ClusterNamespace: "karmada-cluster", ClusterName: newClusterName, + Wait: 60 * time.Second, } err := opts.Run(f) gomega.Expect(err).ShouldNot(gomega.HaveOccurred())