-
Notifications
You must be signed in to change notification settings - Fork 923
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a clusterEviction plugin #3469
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package clustereviction | ||
|
||
import ( | ||
"context" | ||
|
||
"k8s.io/klog/v2" | ||
|
||
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" | ||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" | ||
"github.com/karmada-io/karmada/pkg/scheduler/framework" | ||
"github.com/karmada-io/karmada/pkg/util/helper" | ||
) | ||
|
||
const ( | ||
// Name is the name of the plugin used in the plugin registry and configurations. | ||
Name = "ClusterEviction" | ||
) | ||
|
||
// ClusterEviction is a plugin that checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction. | ||
type ClusterEviction struct{} | ||
|
||
var _ framework.FilterPlugin = &ClusterEviction{} | ||
|
||
// New instantiates the ClusterEviction plugin. | ||
func New() (framework.Plugin, error) { | ||
return &ClusterEviction{}, nil | ||
} | ||
|
||
// Name returns the plugin name. | ||
func (p *ClusterEviction) Name() string { | ||
return Name | ||
} | ||
|
||
// Filter checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction. | ||
func (p *ClusterEviction) Filter(_ context.Context, bindingSpec *workv1alpha2.ResourceBindingSpec, _ *workv1alpha2.ResourceBindingStatus, cluster *clusterv1alpha1.Cluster) *framework.Result { | ||
if helper.ClusterInGracefulEvictionTasks(bindingSpec.GracefulEvictionTasks, cluster.Name) { | ||
klog.V(2).Infof("Cluster(%s) is in the process of eviction.", cluster.Name) | ||
return framework.NewResult(framework.Unschedulable, "cluster(s) is in the process of eviction") | ||
} | ||
|
||
return framework.NewResult(framework.Success) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ import ( | |
"k8s.io/apimachinery/pkg/util/rand" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/klog/v2" | ||
"k8s.io/utils/pointer" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
|
||
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" | ||
|
@@ -141,6 +142,106 @@ var _ = framework.SerialDescribe("failover testing", func() { | |
}) | ||
}) | ||
}) | ||
|
||
ginkgo.Context("Taint cluster testing", func() { | ||
var policyNamespace, policyName string | ||
var deploymentNamespace, deploymentName string | ||
var deployment *appsv1.Deployment | ||
var taint corev1.Taint | ||
var maxGroups, minGroups, numOfFailedClusters int | ||
var policy *policyv1alpha1.PropagationPolicy | ||
maxGroups = 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maxGroups, minGroups, numOfFailedClusters := 1,1,1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. /cc @XiShanYongYe-Chang. I follow the former code style. To be honest, I do not know which style is better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's just a code style, any writing is OK. Keep the overall consistency will be nice. |
||
minGroups = 1 | ||
numOfFailedClusters = 1 | ||
|
||
ginkgo.BeforeEach(func() { | ||
policyNamespace = testNamespace | ||
policyName = deploymentNamePrefix + rand.String(RandomStrLength) | ||
deploymentNamespace = testNamespace | ||
deploymentName = policyName | ||
deployment = testhelper.NewDeployment(deploymentNamespace, deploymentName) | ||
|
||
policy = testhelper.NewPropagationPolicy(policyNamespace, policyName, []policyv1alpha1.ResourceSelector{ | ||
{ | ||
APIVersion: deployment.APIVersion, | ||
Kind: deployment.Kind, | ||
Name: deployment.Name, | ||
}, | ||
}, policyv1alpha1.Placement{ | ||
ClusterAffinity: &policyv1alpha1.ClusterAffinity{ | ||
ClusterNames: framework.ClusterNames(), | ||
}, | ||
ClusterTolerations: []corev1.Toleration{ | ||
{ | ||
Key: "fail-test", | ||
Effect: corev1.TaintEffectNoExecute, | ||
Operator: corev1.TolerationOpExists, | ||
TolerationSeconds: pointer.Int64(3), | ||
}, | ||
}, | ||
SpreadConstraints: []policyv1alpha1.SpreadConstraint{ | ||
{ | ||
SpreadByField: policyv1alpha1.SpreadByFieldCluster, | ||
MaxGroups: maxGroups, | ||
MinGroups: minGroups, | ||
}, | ||
}, | ||
}) | ||
|
||
taint = corev1.Taint{ | ||
Key: "fail-test", | ||
Effect: corev1.TaintEffectNoExecute, | ||
} | ||
}) | ||
|
||
ginkgo.BeforeEach(func() { | ||
framework.CreatePropagationPolicy(karmadaClient, policy) | ||
framework.CreateDeployment(kubeClient, deployment) | ||
ginkgo.DeferCleanup(func() { | ||
framework.RemoveDeployment(kubeClient, deployment.Namespace, deployment.Name) | ||
framework.RemovePropagationPolicy(karmadaClient, policy.Namespace, policy.Name) | ||
}) | ||
}) | ||
|
||
ginkgo.It("taint cluster", func() { | ||
var disabledClusters []string | ||
targetClusterNames := framework.ExtractTargetClustersFrom(controlPlaneClient, deployment) | ||
ginkgo.By("taint one cluster", func() { | ||
temp := numOfFailedClusters | ||
for _, targetClusterName := range targetClusterNames { | ||
if temp > 0 { | ||
klog.Infof("Taint one cluster(%s).", targetClusterName) | ||
err := taintCluster(controlPlaneClient, targetClusterName, taint) | ||
gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) | ||
disabledClusters = append(disabledClusters, targetClusterName) | ||
temp-- | ||
} | ||
} | ||
}) | ||
|
||
ginkgo.By("check whether deployment of failed cluster is rescheduled to other available cluster", func() { | ||
gomega.Eventually(func() int { | ||
targetClusterNames = framework.ExtractTargetClustersFrom(controlPlaneClient, deployment) | ||
for _, targetClusterName := range targetClusterNames { | ||
// the target cluster should be overwritten to another available cluster | ||
if !testhelper.IsExclude(targetClusterName, disabledClusters) { | ||
return 0 | ||
} | ||
} | ||
|
||
return len(targetClusterNames) | ||
}, pollTimeout, pollInterval).Should(gomega.Equal(minGroups)) | ||
}) | ||
|
||
ginkgo.By("recover not ready cluster", func() { | ||
for _, disabledCluster := range disabledClusters { | ||
fmt.Printf("cluster %s is waiting for recovering\n", disabledCluster) | ||
err := recoverTaintedCluster(controlPlaneClient, disabledCluster, taint) | ||
gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) | ||
} | ||
}) | ||
}) | ||
}) | ||
}) | ||
|
||
// disableCluster will set wrong API endpoint of current cluster | ||
|
@@ -167,6 +268,49 @@ func disableCluster(c client.Client, clusterName string) error { | |
return err | ||
} | ||
|
||
// taintCluster will taint cluster | ||
func taintCluster(c client.Client, clusterName string, taint corev1.Taint) error { | ||
err := wait.PollImmediate(pollInterval, pollTimeout, func() (done bool, err error) { | ||
clusterObj := &clusterv1alpha1.Cluster{} | ||
if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil { | ||
if apierrors.IsConflict(err) { | ||
return false, nil | ||
} | ||
return false, err | ||
} | ||
clusterObj.Spec.Taints = append(clusterObj.Spec.Taints, taint) | ||
if err := c.Update(context.TODO(), clusterObj); err != nil { | ||
if apierrors.IsConflict(err) { | ||
return false, nil | ||
} | ||
return false, err | ||
} | ||
return true, nil | ||
}) | ||
return err | ||
} | ||
|
||
// recoverTaintedCluster will recover the taint of the disabled cluster | ||
func recoverTaintedCluster(c client.Client, clusterName string, taint corev1.Taint) error { | ||
err := wait.PollImmediate(pollInterval, pollTimeout, func() (done bool, err error) { | ||
clusterObj := &clusterv1alpha1.Cluster{} | ||
if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil { | ||
if apierrors.IsConflict(err) { | ||
return false, nil | ||
} | ||
return false, err | ||
} | ||
if err := helper.UpdateClusterControllerTaint(context.TODO(), c, nil, []*corev1.Taint{&taint}, clusterObj); err != nil { | ||
if apierrors.IsConflict(err) { | ||
return false, nil | ||
} | ||
return false, err | ||
} | ||
return true, nil | ||
}) | ||
return err | ||
} | ||
|
||
// recoverCluster will recover API endpoint of the disable cluster | ||
func recoverCluster(c client.Client, clusterName string, originalAPIEndpoint string) error { | ||
err := wait.PollImmediate(pollInterval, pollTimeout, func() (done bool, err error) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this plugin only take effect when the
Application Failover
feature is enabled?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not only application failover, but also the current cluster failover. See comments: #3456 (comment).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this conflict with the
TaintToleration
plugin?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think there will be any conflict. The plugins will execute sequentially, ensuring that the final result does not include the cluster in eviction tasks, as intended.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok.