diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index ad1b676ab6..322d84894e 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -284,7 +284,7 @@ func balanceDomains( // also (just for tracking), add them to the list of pods in the lower topology aboveToEvict := sortedDomains[j].pods[len(sortedDomains[j].pods)-movePods:] for k := range aboveToEvict { - // if the pod has a hard nodeAffinity or nodeSelector that only matches this node, + // if the pod has a hard nodeAffinity/nodeSelector/toleration that only matches this node, // don't bother evicting it as it will just end up back on the same node // however we still account for it "being evicted" so the algorithm can complete // TODO(@damemi): Since we don't order pods wrt their affinities, we should refactor this to skip the current pod @@ -298,6 +298,12 @@ func balanceDomains( klog.V(2).InfoS("Ignoring pod for eviction due to node selector/affinity", "pod", klog.KObj(aboveToEvict[k])) continue } + + if nodesPodToleratedOnBesidesCurrent(aboveToEvict[k], nodeMap) == 0 { + klog.V(2).InfoS("Ignoring pod for eviction due to node taints not being tolerated", "pod", klog.KObj(aboveToEvict[k])) + continue + } + podsForEviction[aboveToEvict[k]] = struct{}{} } sortedDomains[j].pods = sortedDomains[j].pods[:len(sortedDomains[j].pods)-movePods] @@ -312,7 +318,21 @@ func balanceDomains( func nodesPodFitsOnBesidesCurrent(pod *v1.Pod, nodeMap map[string]*v1.Node) int { count := 0 for _, node := range nodeMap { - if nodeutil.PodFitsCurrentNode(pod, node) && node != nodeMap[pod.Spec.NodeName] { + if node != nodeMap[pod.Spec.NodeName] && nodeutil.PodFitsCurrentNode(pod, node) { + count++ + } + } + return count +} + +// nodesPodToleratedOnBesidesCurrent counts the number of nodes this pod could fit on based on its toleration and the node's taints. +func nodesPodToleratedOnBesidesCurrent(pod *v1.Pod, nodeMap map[string]*v1.Node) int { + count := 0 + hardTaintsFilter := func(taint *v1.Taint) bool { + return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute + } + for _, node := range nodeMap { + if node != nodeMap[pod.Spec.NodeName] && utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, hardTaintsFilter) { count++ } } diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/descheduler/strategies/topologyspreadconstraint_test.go index c9c4354c12..bebfc2fca7 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint_test.go @@ -435,6 +435,134 @@ func TestTopologySpreadConstraint(t *testing.T) { strategy: api.DeschedulerStrategy{}, namespaces: []string{"ns1"}, }, + { + name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod since pod tolerates the node with taint", + nodes: []*v1.Node{ + test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { + n.Labels["zone"] = "zoneB" + n.Spec.Taints = []v1.Taint{ + { + Key: "taint-test", + Value: "test", + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + constraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + }, + }, + tolerations: []v1.Toleration{ + { + Key: "taint-test", + Value: "test", + Operator: v1.TolerationOpEqual, + Effect: v1.TaintEffectNoSchedule, + }, + }, + }, + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + nodeSelector: map[string]string{"zone": "zoneA"}, + }, + }), + expectedEvictedCount: 1, + strategy: api.DeschedulerStrategy{}, + namespaces: []string{"ns1"}, + }, + { + name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node", + nodes: []*v1.Node{ + test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { + n.Labels["zone"] = "zoneB" + n.Spec.Taints = []v1.Taint{ + { + Key: "taint-test", + Value: "test", + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + constraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + }, + }, + }, + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + nodeSelector: map[string]string{"zone": "zoneA"}, + }, + }), + expectedEvictedCount: 0, + strategy: api.DeschedulerStrategy{}, + namespaces: []string{"ns1"}, + }, + { + name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with PreferNoSchedule Taint", + nodes: []*v1.Node{ + test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { + n.Labels["zone"] = "zoneB" + n.Spec.Taints = []v1.Taint{ + { + Key: "taint-test", + Value: "test", + Effect: v1.TaintEffectPreferNoSchedule, + }, + } + }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + constraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + }, + }, + }, + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + nodeSelector: map[string]string{"zone": "zoneA"}, + }, + }), + expectedEvictedCount: 1, + strategy: api.DeschedulerStrategy{}, + namespaces: []string{"ns1"}, + }, } for _, tc := range testCases { @@ -478,6 +606,7 @@ type testPodList struct { nodeSelector map[string]string nodeAffinity *v1.Affinity noOwners bool + tolerations []v1.Toleration } func createTestPods(testPods []testPodList) []*v1.Pod { @@ -496,6 +625,7 @@ func createTestPods(testPods []testPodList) []*v1.Pod { } p.Spec.TopologySpreadConstraints = tp.constraints p.Spec.NodeSelector = tp.nodeSelector + p.Spec.Tolerations = tp.tolerations p.Spec.Affinity = tp.nodeAffinity })) podNum++