Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pod anti-affinity check among nodes #1033

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -78,24 +78,36 @@ func (d *RemovePodsViolatingInterPodAntiAffinity) Name() string {
}

func (d *RemovePodsViolatingInterPodAntiAffinity) Deschedule(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
podsList, err := d.handle.ClientSet().CoreV1().Pods("").List(ctx, metav1.ListOptions{})
if err != nil {
return &frameworktypes.Status{
Err: fmt.Errorf("error listing all pods: %v", err),
}
}

podsInANamespace := groupByNamespace(podsList)

runningPodFilter := func(pod *v1.Pod) bool {
return pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed
}
Comment on lines +90 to +92
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I'm afraid that this code duplicating with following:

f := func(pod *v1.Pod) bool {
return pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed
}

podsOnANode := groupByNodeName(podsList, podutil.WrapFilterFuncs(runningPodFilter, d.podFilter))

nodeMap := createNodeMap(nodes)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could nodeMap replace the need to call ListPodsOnANode() in the loop below?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or, maybe not nodeMap sorry... but looking at how we call allPods above and then call ListPodsOnANode, maybe we can cut out one of them

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your review and sorry for my late reply.

I removed ListPodsOnANode call and keep code to query for all pods. Because listing pods by each node looks not efficient.
But, I'm not sure that my code filtering pods for podsOnANode does same as ListPodsOnANode call.


loop:
for _, node := range nodes {
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter)
if err != nil {
return &frameworktypes.Status{
Err: fmt.Errorf("error listing pods: %v", err),
}
}
// sort the evictable Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers.
pods := podsOnANode[node.Name]
// sort the evict-able Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers.
podutil.SortPodsBasedOnPriorityLowToHigh(pods)
totalPods := len(pods)
for i := 0; i < totalPods; i++ {
if checkPodsWithAntiAffinityExist(pods[i], pods) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
if checkPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{}) {
// Since the current pod is evicted all other pods which have anti-affinity with this
// pod need not be evicted.
// Update pods.
// Update allPods.
podsInANamespace = removePodFromNamespaceMap(pods[i], podsInANamespace)
pods = append(pods[:i], pods[i+1:]...)
i--
totalPods--
Expand All @@ -109,8 +121,54 @@ loop:
return nil
}

func removePodFromNamespaceMap(podToRemove *v1.Pod, podMap map[string][]*v1.Pod) map[string][]*v1.Pod {
podList, ok := podMap[podToRemove.Namespace]
if !ok {
return podMap
}
for i := 0; i < len(podList); i++ {
podToCheck := podList[i]
if podToRemove.Name == podToCheck.Name {
podMap[podToRemove.Namespace] = append(podList[:i], podList[i+1:]...)
return podMap
}
}
return podMap
}

func groupByNamespace(pods *v1.PodList) map[string][]*v1.Pod {
m := make(map[string][]*v1.Pod)
for i := 0; i < len(pods.Items); i++ {
pod := &(pods.Items[i])
m[pod.Namespace] = append(m[pod.Namespace], pod)
}
return m
}

func groupByNodeName(pods *v1.PodList, filter podutil.FilterFunc) map[string][]*v1.Pod {
m := make(map[string][]*v1.Pod)
if filter == nil {
filter = func(p *v1.Pod) bool { return true }
}
for i := 0; i < len(pods.Items); i++ {
pod := &(pods.Items[i])
if filter(pod) {
m[pod.Spec.NodeName] = append(m[pod.Spec.NodeName], pod)
}
}
return m
}

func createNodeMap(nodes []*v1.Node) map[string]*v1.Node {
m := make(map[string]*v1.Node, len(nodes))
for _, node := range nodes {
m[node.GetName()] = node
}
return m
}

// checkPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool {
func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
affinity := pod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
Expand All @@ -120,16 +178,52 @@ func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for _, existingPod := range pods {
if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
return true
for namespace := range namespaces {
for _, existingPod := range pods[namespace] {
if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
node, ok := nodeMap[pod.Spec.NodeName]
if !ok {
continue
}
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
if !ok {
continue
}
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
klog.V(1).InfoS("Found Pods violating PodAntiAffinity", "pod to evicted", klog.KObj(pod))
return true
}
}
}
}
}
}
return false
}

func hasSameLabelValue(node1, node2 *v1.Node, key string) bool {
if node1.Name == node2.Name {
return true
}
node1Labels := node1.Labels
if node1Labels == nil {
return false
}
node2Labels := node2.Labels
if node2Labels == nil {
return false
}
value1, ok := node1Labels[key]
if !ok {
return false
}
value2, ok := node2Labels[key]
if !ok {
return false
}
return value1 == value2
}

// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
if podAntiAffinity != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,27 @@ import (
)

func TestPodAntiAffinity(t *testing.T) {
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
node1 := test.BuildTestNode("n1", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"region": "main-region",
}
})
node2 := test.BuildTestNode("n2", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"datacenter": "east",
}
})

node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
node.Spec = v1.NodeSpec{
Unschedulable: true,
}
})
node4 := test.BuildTestNode("n4", 2, 2, 1, nil)
node5 := test.BuildTestNode("n5", 200, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"region": "main-region",
}
})

p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
Expand All @@ -62,6 +70,7 @@ func TestPodAntiAffinity(t *testing.T) {
p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil)
p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil)
p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil)
p11 := test.BuildTestPod("p11", 100, 0, node5.Name, nil)
p9.DeletionTimestamp = &metav1.Time{}
p10.DeletionTimestamp = &metav1.Time{}

Expand All @@ -73,6 +82,7 @@ func TestPodAntiAffinity(t *testing.T) {
p5.Labels = map[string]string{"foo": "bar"}
p6.Labels = map[string]string{"foo": "bar"}
p7.Labels = map[string]string{"foo1": "bar1"}
p11.Labels = map[string]string{"foo": "bar"}
nonEvictablePod.Labels = map[string]string{"foo": "bar"}
test.SetNormalOwnerRef(p1)
test.SetNormalOwnerRef(p2)
Expand All @@ -83,6 +93,7 @@ func TestPodAntiAffinity(t *testing.T) {
test.SetNormalOwnerRef(p7)
test.SetNormalOwnerRef(p9)
test.SetNormalOwnerRef(p10)
test.SetNormalOwnerRef(p11)

// set pod anti affinity
setPodAntiAffinity(p1, "foo", "bar")
Expand Down Expand Up @@ -186,6 +197,13 @@ func TestPodAntiAffinity(t *testing.T) {
expectedEvictedPodCount: 0,
nodeFit: true,
},
{
description: "Evict pod violating anti-affinity among different node (all pods have anti-affinity)",
pods: []*v1.Pod{p1, p11},
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 1,
nodeFit: false,
},
}

for _, test := range tests {
Expand Down