Skip to content

Commit

Permalink
add a clusterEvicted plugin
Browse files Browse the repository at this point in the history
Signed-off-by: Poor12 <shentiecheng@huawei.com>
  • Loading branch information
Poor12 committed Apr 27, 2023
1 parent 0f84dd7 commit 6c33e76
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package clustereviction

import (
"context"

"k8s.io/klog/v2"

clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
"github.com/karmada-io/karmada/pkg/scheduler/framework"
"github.com/karmada-io/karmada/pkg/util/helper"
)

const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "ClusterEviction"
)

// ClusterEviction is a plugin that checks if the target cluster is in the GracefulEvictionTasks which means it has been evicted.
type ClusterEviction struct{}

var _ framework.FilterPlugin = &ClusterEviction{}

// New instantiates the APIEnablement plugin.
func New() (framework.Plugin, error) {
return &ClusterEviction{}, nil
}

// Name returns the plugin name.
func (p *ClusterEviction) Name() string {
return Name
}

// Filter checks if the target cluster is in the GracefulEvictionTasks which means it has been evicted.
func (p *ClusterEviction) Filter(_ context.Context, bindingSpec *workv1alpha2.ResourceBindingSpec, _ *workv1alpha2.ResourceBindingStatus, cluster *clusterv1alpha1.Cluster) *framework.Result {
if helper.CheckIfClusterEvicted(bindingSpec.GracefulEvictionTasks, cluster.Name) {
klog.V(2).Infof("Cluster(%s) has been evicted before.", cluster.Name)
return framework.NewResult(framework.Unschedulable, "cluster(s) has been evicted before")
}

return framework.NewResult(framework.Success)
}
2 changes: 2 additions & 0 deletions pkg/scheduler/framework/plugins/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package plugins
import (
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/apienablement"
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/clusteraffinity"
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/clustereviction"
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/clusterlocality"
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/spreadconstraint"
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/tainttoleration"
Expand All @@ -17,5 +18,6 @@ func NewInTreeRegistry() runtime.Registry {
clusteraffinity.Name: clusteraffinity.New,
spreadconstraint.Name: spreadconstraint.New,
clusterlocality.Name: clusterlocality.New,
clustereviction.Name: clustereviction.New,
}
}
12 changes: 12 additions & 0 deletions pkg/util/helper/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package helper

import (
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
)

// IsAPIEnabled checks if target API (or CRD) referencing by groupVersion and kind has been installed.
Expand All @@ -21,3 +22,14 @@ func IsAPIEnabled(APIEnablements []clusterv1alpha1.APIEnablement, groupVersion s

return false
}

// CheckIfClusterEvicted checks if the target cluster is in the GracefulEvictionTasks which means it has been evicted.
func CheckIfClusterEvicted(gracefulEvictionTasks []workv1alpha2.GracefulEvictionTask, clusterName string) bool {
for _, task := range gracefulEvictionTasks {
if task.FromCluster == clusterName {
return true
}
}

return false
}
46 changes: 46 additions & 0 deletions pkg/util/helper/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"testing"

clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
)

func TestIsAPIEnabled(t *testing.T) {
Expand Down Expand Up @@ -72,3 +73,48 @@ func TestIsAPIEnabled(t *testing.T) {
})
}
}

func TestCheckIfClusterEvicted(t *testing.T) {
gracefulEvictionTasks := []workv1alpha2.GracefulEvictionTask{
{
FromCluster: ClusterMember1,
Producer: workv1alpha2.EvictionProducerTaintManager,
Reason: workv1alpha2.EvictionReasonTaintUntolerated,
},
{
FromCluster: ClusterMember2,
Producer: workv1alpha2.EvictionProducerTaintManager,
Reason: workv1alpha2.EvictionReasonTaintUntolerated,
},
}

tests := []struct {
name string
gracefulEvictionTasks []workv1alpha2.GracefulEvictionTask
targetCluster string
expect bool
}{
{
name: "targetCluster has been evicted",
gracefulEvictionTasks: gracefulEvictionTasks,
targetCluster: ClusterMember1,
expect: true,
},
{
name: "targetCluster has been not evicted",
gracefulEvictionTasks: gracefulEvictionTasks,
targetCluster: ClusterMember3,
expect: false,
},
}

for _, test := range tests {
tc := test
t.Run(tc.name, func(t *testing.T) {
result := CheckIfClusterEvicted(tc.gracefulEvictionTasks, tc.targetCluster)
if result != tc.expect {
t.Errorf("expected: %v, but got: %v", tc.expect, result)
}
})
}
}
144 changes: 144 additions & 0 deletions test/e2e/failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
Expand Down Expand Up @@ -141,6 +142,106 @@ var _ = framework.SerialDescribe("failover testing", func() {
})
})
})

ginkgo.Context("Taint cluster testing", func() {
var policyNamespace, policyName string
var deploymentNamespace, deploymentName string
var deployment *appsv1.Deployment
var taint corev1.Taint
var maxGroups, minGroups, numOfFailedClusters int
var policy *policyv1alpha1.PropagationPolicy
maxGroups = 1
minGroups = 1
numOfFailedClusters = 1

ginkgo.BeforeEach(func() {
policyNamespace = testNamespace
policyName = deploymentNamePrefix + rand.String(RandomStrLength)
deploymentNamespace = testNamespace
deploymentName = policyName
deployment = testhelper.NewDeployment(deploymentNamespace, deploymentName)

policy = testhelper.NewPropagationPolicy(policyNamespace, policyName, []policyv1alpha1.ResourceSelector{
{
APIVersion: deployment.APIVersion,
Kind: deployment.Kind,
Name: deployment.Name,
},
}, policyv1alpha1.Placement{
ClusterAffinity: &policyv1alpha1.ClusterAffinity{
ClusterNames: framework.ClusterNames(),
},
ClusterTolerations: []corev1.Toleration{
{
Key: "fail-test",
Effect: corev1.TaintEffectNoExecute,
Operator: corev1.TolerationOpExists,
TolerationSeconds: pointer.Int64(3),
},
},
SpreadConstraints: []policyv1alpha1.SpreadConstraint{
{
SpreadByField: policyv1alpha1.SpreadByFieldCluster,
MaxGroups: maxGroups,
MinGroups: minGroups,
},
},
})

taint = corev1.Taint{
Key: "fail-test",
Effect: corev1.TaintEffectNoExecute,
}
})

ginkgo.BeforeEach(func() {
framework.CreatePropagationPolicy(karmadaClient, policy)
framework.CreateDeployment(kubeClient, deployment)
ginkgo.DeferCleanup(func() {
framework.RemoveDeployment(kubeClient, deployment.Namespace, deployment.Name)
framework.RemovePropagationPolicy(karmadaClient, policy.Namespace, policy.Name)
})
})

ginkgo.It("taint cluster", func() {
var disabledClusters []string
targetClusterNames := framework.ExtractTargetClustersFrom(controlPlaneClient, deployment)
ginkgo.By("taint one cluster", func() {
temp := numOfFailedClusters
for _, targetClusterName := range targetClusterNames {
if temp > 0 {
klog.Infof("Taint one cluster(%s).", targetClusterName)
err := taintCluster(controlPlaneClient, targetClusterName, taint)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
disabledClusters = append(disabledClusters, targetClusterName)
temp--
}
}
})

ginkgo.By("check whether deployment of failed cluster is rescheduled to other available cluster", func() {
gomega.Eventually(func() int {
targetClusterNames = framework.ExtractTargetClustersFrom(controlPlaneClient, deployment)
for _, targetClusterName := range targetClusterNames {
// the target cluster should be overwritten to another available cluster
if !testhelper.IsExclude(targetClusterName, disabledClusters) {
return 0
}
}

return len(targetClusterNames)
}, pollTimeout, pollInterval).Should(gomega.Equal(minGroups))
})

ginkgo.By("recover not ready cluster", func() {
for _, disabledCluster := range disabledClusters {
fmt.Printf("cluster %s is waiting for recovering\n", disabledCluster)
err := recoverTaintedCluster(controlPlaneClient, disabledCluster, taint)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
}
})
})
})
})

// disableCluster will set wrong API endpoint of current cluster
Expand All @@ -167,6 +268,49 @@ func disableCluster(c client.Client, clusterName string) error {
return err
}

// taintCluster will taint cluster
func taintCluster(c client.Client, clusterName string, taint corev1.Taint) error {
err := wait.PollImmediate(pollInterval, pollTimeout, func() (done bool, err error) {
clusterObj := &clusterv1alpha1.Cluster{}
if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil {
if apierrors.IsConflict(err) {
return false, nil
}
return false, err
}
clusterObj.Spec.Taints = append(clusterObj.Spec.Taints, taint)
if err := c.Update(context.TODO(), clusterObj); err != nil {
if apierrors.IsConflict(err) {
return false, nil
}
return false, err
}
return true, nil
})
return err
}

// recoverTaintedCluster will recover the taint of the disabled cluster
func recoverTaintedCluster(c client.Client, clusterName string, taint corev1.Taint) error {
err := wait.PollImmediate(pollInterval, pollTimeout, func() (done bool, err error) {
clusterObj := &clusterv1alpha1.Cluster{}
if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil {
if apierrors.IsConflict(err) {
return false, nil
}
return false, err
}
if err := helper.UpdateClusterControllerTaint(context.TODO(), c, nil, []*corev1.Taint{&taint}, clusterObj); err != nil {
if apierrors.IsConflict(err) {
return false, nil
}
return false, err
}
return true, nil
})
return err
}

// recoverCluster will recover API endpoint of the disable cluster
func recoverCluster(c client.Client, clusterName string, originalAPIEndpoint string) error {
err := wait.PollImmediate(pollInterval, pollTimeout, func() (done bool, err error) {
Expand Down

0 comments on commit 6c33e76

Please sign in to comment.