diff --git a/glide.yaml b/glide.yaml index 4841175f0a..b10043ee8c 100644 --- a/glide.yaml +++ b/glide.yaml @@ -10,8 +10,6 @@ import: version: kubernetes-1.17.0 - package: k8s.io/apimachinery version: kubernetes-1.17.0 -- package: k8s.io/kubernetes - version: release-1.17 - package: k8s.io/code-generator version: kubernetes-1.17.0 - package: k8s.io/gengo diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index ee55d855a7..a515556dc8 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -28,6 +28,7 @@ import ( eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" "sigs.k8s.io/descheduler/pkg/descheduler/strategies" + "sigs.k8s.io/descheduler/pkg/utils" ) func Run(rs *options.DeschedulerServer) error { @@ -65,7 +66,7 @@ func RunDeschedulerStrategies(rs *options.DeschedulerServer, deschedulerPolicy * return nil } - nodePodCount := strategies.InitializeNodePodCount(nodes) + nodePodCount := utils.InitializeNodePodCount(nodes) wait.Until(func() { strategies.RemoveDuplicatePods(rs, deschedulerPolicy.Strategies["RemoveDuplicates"], evictionPolicyGroupVersion, nodes, nodePodCount) strategies.LowNodeUtilization(rs, deschedulerPolicy.Strategies["LowNodeUtilization"], evictionPolicyGroupVersion, nodes, nodePodCount) diff --git a/pkg/descheduler/pod/pods.go b/pkg/descheduler/pod/pods.go index 82e5d977f3..37a96aa7ed 100644 --- a/pkg/descheduler/pod/pods.go +++ b/pkg/descheduler/pod/pods.go @@ -21,9 +21,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" clientset "k8s.io/client-go/kubernetes" - api "k8s.io/kubernetes/pkg/apis/core" - "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" - "k8s.io/kubernetes/pkg/kubelet/types" + "sigs.k8s.io/descheduler/pkg/utils" ) const ( @@ -57,7 +55,7 @@ func ListEvictablePodsOnNode(client clientset.Interface, node *v1.Node, evictLoc } func ListPodsOnANode(client clientset.Interface, node *v1.Node) ([]*v1.Pod, error) { - fieldSelector, err := fields.ParseSelector("spec.nodeName=" + node.Name + ",status.phase!=" + string(api.PodSucceeded) + ",status.phase!=" + string(api.PodFailed)) + fieldSelector, err := fields.ParseSelector("spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed)) if err != nil { return []*v1.Pod{}, err } @@ -76,19 +74,19 @@ func ListPodsOnANode(client clientset.Interface, node *v1.Node) ([]*v1.Pod, erro } func IsCriticalPod(pod *v1.Pod) bool { - return types.IsCriticalPod(pod) + return utils.IsCriticalPod(pod) } func IsBestEffortPod(pod *v1.Pod) bool { - return qos.GetPodQOS(pod) == v1.PodQOSBestEffort + return utils.GetPodQOS(pod) == v1.PodQOSBestEffort } func IsBurstablePod(pod *v1.Pod) bool { - return qos.GetPodQOS(pod) == v1.PodQOSBurstable + return utils.GetPodQOS(pod) == v1.PodQOSBurstable } func IsGuaranteedPod(pod *v1.Pod) bool { - return qos.GetPodQOS(pod) == v1.PodQOSGuaranteed + return utils.GetPodQOS(pod) == v1.PodQOSGuaranteed } func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool { @@ -102,8 +100,7 @@ func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool { // IsMirrorPod checks whether the pod is a mirror pod. func IsMirrorPod(pod *v1.Pod) bool { - _, found := pod.ObjectMeta.Annotations[types.ConfigMirrorAnnotationKey] - return found + return utils.IsMirrorPod(pod) } // HaveEvictAnnotation checks if the pod have evict annotation diff --git a/pkg/descheduler/pod/pods_test.go b/pkg/descheduler/pod/pods_test.go index 1cd00c48e8..67c682d6cf 100644 --- a/pkg/descheduler/pod/pods_test.go +++ b/pkg/descheduler/pod/pods_test.go @@ -17,11 +17,11 @@ limitations under the License. package pod import ( + "sigs.k8s.io/descheduler/pkg/utils" "testing" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/kubernetes/pkg/apis/scheduling" "sigs.k8s.io/descheduler/test" ) @@ -209,7 +209,7 @@ func TestPodTypes(t *testing.T) { // A Critical Pod. p5.Namespace = "kube-system" p5.Annotations = test.GetCriticalPodAnnotation() - systemCriticalPriority := scheduling.SystemCriticalPriority + systemCriticalPriority := utils.SystemCriticalPriority p5.Spec.Priority = &systemCriticalPriority if !IsMirrorPod(p4) { t.Errorf("Expected p4 to be a mirror pod.") diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index ffe5d21efa..c638e65fee 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -27,6 +27,7 @@ import ( "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/utils" ) //type creator string @@ -35,7 +36,7 @@ type DuplicatePodsMap map[string][]*v1.Pod // RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node. // A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same // namespace. As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages. -func RemoveDuplicatePods(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodepodCount nodePodEvictedCount) { +func RemoveDuplicatePods(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodepodCount utils.NodePodEvictedCount) { if !strategy.Enabled { return } @@ -43,7 +44,7 @@ func RemoveDuplicatePods(ds *options.DeschedulerServer, strategy api.Descheduler } // deleteDuplicatePods evicts the pod from node and returns the count of evicted pods. -func deleteDuplicatePods(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodepodCount nodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int { +func deleteDuplicatePods(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodepodCount utils.NodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int { podsEvicted := 0 for _, node := range nodes { klog.V(1).Infof("Processing node: %#v", node.Name) diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index f7d0abdf2b..b149111b69 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -23,13 +23,12 @@ import ( "k8s.io/apimachinery/pkg/api/resource" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog" - helper "k8s.io/kubernetes/pkg/api/v1/resource" - "sigs.k8s.io/descheduler/cmd/descheduler/app/options" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/utils" ) type NodeUsageMap struct { @@ -44,7 +43,7 @@ type NodeUsageMap struct { type NodePodsMap map[*v1.Node][]*v1.Pod -func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodepodCount nodePodEvictedCount) { +func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodepodCount utils.NodePodEvictedCount) { if !strategy.Enabled { return } @@ -155,7 +154,7 @@ func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThr // evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not // evicts them based on QoS as fallback option. // TODO: @ravig Break this function into smaller functions. -func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVersion string, targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, dryRun bool, maxPodsToEvict int, nodepodCount nodePodEvictedCount) int { +func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVersion string, targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, dryRun bool, maxPodsToEvict int, nodepodCount utils.NodePodEvictedCount) int { podsEvicted := 0 SortNodesByUsage(targetNodes) @@ -240,8 +239,8 @@ func evictPods(inputPods []*v1.Pod, if maxPodsToEvict > 0 && *podsEvicted+1 > maxPodsToEvict { break } - cUsage := helper.GetResourceRequest(pod, v1.ResourceCPU) - mUsage := helper.GetResourceRequest(pod, v1.ResourceMemory) + cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU) + mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory) success, err := evictions.EvictPod(client, pod, evictionPolicyGroupVersion, dryRun) if !success { klog.Warningf("Error when evicting pod: %#v (%#v)", pod.Name, err) @@ -373,7 +372,7 @@ func NodeUtilization(node *v1.Node, pods []*v1.Pod, evictLocalStoragePods bool) gPods = append(gPods, pod) } - req, _ := helper.PodRequestsAndLimits(pod) + req, _ := utils.PodRequestsAndLimits(pod) for name, quantity := range req { if name == v1.ResourceCPU || name == v1.ResourceMemory { if value, ok := totalReqs[name]; !ok { diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index 5f6b9f89b1..96c775eddb 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -19,6 +19,7 @@ package strategies import ( "k8s.io/api/core/v1" "k8s.io/klog" + "sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/cmd/descheduler/app/options" "sigs.k8s.io/descheduler/pkg/api" @@ -27,11 +28,11 @@ import ( podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" ) -func RemovePodsViolatingNodeAffinity(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodePodCount nodePodEvictedCount) { +func RemovePodsViolatingNodeAffinity(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodePodCount utils.NodePodEvictedCount) { removePodsViolatingNodeAffinityCount(ds, strategy, evictionPolicyGroupVersion, nodes, nodePodCount, ds.MaxNoOfPodsToEvictPerNode, ds.EvictLocalStoragePods) } -func removePodsViolatingNodeAffinityCount(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodepodCount nodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int { +func removePodsViolatingNodeAffinityCount(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodepodCount utils.NodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int { evictedPodCount := 0 if !strategy.Enabled { return evictedPodCount diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index d2903e8176..f85a8403dd 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -21,6 +21,7 @@ import ( "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/utils" "k8s.io/api/core/v1" clientset "k8s.io/client-go/kubernetes" @@ -33,7 +34,7 @@ const ( ) // RemovePodsViolatingNodeTaints with elimination strategy -func RemovePodsViolatingNodeTaints(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodePodCount nodePodEvictedCount) { +func RemovePodsViolatingNodeTaints(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodePodCount utils.NodePodEvictedCount) { if !strategy.Enabled { return } @@ -41,7 +42,7 @@ func RemovePodsViolatingNodeTaints(ds *options.DeschedulerServer, strategy api.D } // deletePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes -func deletePodsViolatingNodeTaints(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodePodCount nodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int { +func deletePodsViolatingNodeTaints(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodePodCount utils.NodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int { podsEvicted := 0 for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index 3ffb638c94..63ff7e70c7 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -21,16 +21,16 @@ import ( "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/utils" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog" - priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util" ) // RemovePodsViolatingInterPodAntiAffinity with elimination strategy -func RemovePodsViolatingInterPodAntiAffinity(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodePodCount nodePodEvictedCount) { +func RemovePodsViolatingInterPodAntiAffinity(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodePodCount utils.NodePodEvictedCount) { if !strategy.Enabled { return } @@ -38,7 +38,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ds *options.DeschedulerServer, stra } // removePodsWithAffinityRules evicts pods on the node which are having a pod affinity rules. -func removePodsWithAffinityRules(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodePodCount nodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int { +func removePodsWithAffinityRules(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodePodCount utils.NodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int { podsEvicted := 0 for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) @@ -77,14 +77,14 @@ func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool { affinity := pod.Spec.Affinity if affinity != nil && affinity.PodAntiAffinity != nil { for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) { - namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term) + namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term) selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) if err != nil { klog.Infof("%v", err) return false } for _, existingPod := range pods { - if existingPod.Name != pod.Name && priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) { + if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) { return true } } diff --git a/pkg/descheduler/strategies/util.go b/pkg/descheduler/strategies/util.go deleted file mode 100644 index f1dbe31f1b..0000000000 --- a/pkg/descheduler/strategies/util.go +++ /dev/null @@ -1,37 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package strategies - -import ( - "k8s.io/api/core/v1" -) - -// This file contains the datastructures, types & functions needed by all the strategies so that we don't have -// to compute them again in each strategy. - -// nodePodEvictedCount keeps count of pods evicted on node. This is used in conjunction with strategies to -type nodePodEvictedCount map[*v1.Node]int - -// InitializeNodePodCount initializes the nodePodCount. -func InitializeNodePodCount(nodeList []*v1.Node) nodePodEvictedCount { - var nodePodCount = make(nodePodEvictedCount) - for _, node := range nodeList { - // Initialize podsEvicted till now with 0. - nodePodCount[node] = 0 - } - return nodePodCount -} diff --git a/pkg/utils/pod.go b/pkg/utils/pod.go new file mode 100644 index 0000000000..4eafdcab2e --- /dev/null +++ b/pkg/utils/pod.go @@ -0,0 +1,181 @@ +package utils + +import ( + "fmt" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/component-base/featuregate" +) + +const ( + // owner: @jinxu + // beta: v1.10 + // + // New local storage types to support local storage capacity isolation + LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation" + + // owner: @egernst + // alpha: v1.16 + // + // Enables PodOverhead, for accounting pod overheads which are specific to a given RuntimeClass + PodOverhead featuregate.Feature = "PodOverhead" +) + +// GetResourceRequest finds and returns the request value for a specific resource. +func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 { + if resource == v1.ResourcePods { + return 1 + } + + requestQuantity := GetResourceRequestQuantity(pod, resource) + + if resource == v1.ResourceCPU { + return requestQuantity.MilliValue() + } + + return requestQuantity.Value() +} + +// GetResourceRequestQuantity finds and returns the request quantity for a specific resource. +func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity { + requestQuantity := resource.Quantity{} + + switch resourceName { + case v1.ResourceCPU: + requestQuantity = resource.Quantity{Format: resource.DecimalSI} + case v1.ResourceMemory, v1.ResourceStorage, v1.ResourceEphemeralStorage: + requestQuantity = resource.Quantity{Format: resource.BinarySI} + default: + requestQuantity = resource.Quantity{Format: resource.DecimalSI} + } + + if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(LocalStorageCapacityIsolation) { + // if the local storage capacity isolation feature gate is disabled, pods request 0 disk + return requestQuantity + } + + for _, container := range pod.Spec.Containers { + if rQuantity, ok := container.Resources.Requests[resourceName]; ok { + requestQuantity.Add(rQuantity) + } + } + + for _, container := range pod.Spec.InitContainers { + if rQuantity, ok := container.Resources.Requests[resourceName]; ok { + if requestQuantity.Cmp(rQuantity) < 0 { + requestQuantity = rQuantity.DeepCopy() + } + } + } + + // if PodOverhead feature is supported, add overhead for running a pod + // to the total requests if the resource total is non-zero + if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) { + if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() { + requestQuantity.Add(podOverhead) + } + } + + return requestQuantity +} + +// IsMirrorPod returns true if the passed Pod is a Mirror Pod. +func IsMirrorPod(pod *v1.Pod) bool { + _, ok := pod.Annotations[v1.MirrorPodAnnotationKey] + return ok +} + +// IsStaticPod returns true if the pod is a static pod. +func IsStaticPod(pod *v1.Pod) bool { + source, err := GetPodSource(pod) + return err == nil && source != "api" +} + +// GetPodSource returns the source of the pod based on the annotation. +func GetPodSource(pod *v1.Pod) (string, error) { + if pod.Annotations != nil { + if source, ok := pod.Annotations["kubernetes.io/config.source"]; ok { + return source, nil + } + } + return "", fmt.Errorf("cannot get source of pod %q", pod.UID) +} + +// IsCriticalPod returns true if pod's priority is greater than or equal to SystemCriticalPriority. +func IsCriticalPod(pod *v1.Pod) bool { + if IsStaticPod(pod) { + return true + } + if IsMirrorPod(pod) { + return true + } + if pod.Spec.Priority != nil && IsCriticalPodBasedOnPriority(*pod.Spec.Priority) { + return true + } + return false +} + +// IsCriticalPodBasedOnPriority checks if the given pod is a critical pod based on priority resolved from pod Spec. +func IsCriticalPodBasedOnPriority(priority int32) bool { + return priority >= SystemCriticalPriority +} + +// PodRequestsAndLimits returns a dictionary of all defined resources summed up for all +// containers of the pod. If PodOverhead feature is enabled, pod overhead is added to the +// total container resource requests and to the total container limits which have a +// non-zero quantity. +func PodRequestsAndLimits(pod *v1.Pod) (reqs, limits v1.ResourceList) { + reqs, limits = v1.ResourceList{}, v1.ResourceList{} + for _, container := range pod.Spec.Containers { + addResourceList(reqs, container.Resources.Requests) + addResourceList(limits, container.Resources.Limits) + } + // init containers define the minimum of any resource + for _, container := range pod.Spec.InitContainers { + maxResourceList(reqs, container.Resources.Requests) + maxResourceList(limits, container.Resources.Limits) + } + + // if PodOverhead feature is supported, add overhead for running a pod + // to the sum of reqeuests and to non-zero limits: + if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) { + addResourceList(reqs, pod.Spec.Overhead) + + for name, quantity := range pod.Spec.Overhead { + if value, ok := limits[name]; ok && !value.IsZero() { + value.Add(quantity) + limits[name] = value + } + } + } + + return +} + +// addResourceList adds the resources in newList to list +func addResourceList(list, newList v1.ResourceList) { + for name, quantity := range newList { + if value, ok := list[name]; !ok { + list[name] = quantity.DeepCopy() + } else { + value.Add(quantity) + list[name] = value + } + } +} + +// maxResourceList sets list to the greater of list/newList for every resource +// either list +func maxResourceList(list, new v1.ResourceList) { + for name, quantity := range new { + if value, ok := list[name]; !ok { + list[name] = quantity.DeepCopy() + continue + } else { + if quantity.Cmp(value) > 0 { + list[name] = quantity.DeepCopy() + } + } + } +} diff --git a/pkg/utils/predicates.go b/pkg/utils/predicates.go index a2ef1fb3a8..c8a7ad61bf 100644 --- a/pkg/utils/predicates.go +++ b/pkg/utils/predicates.go @@ -21,8 +21,8 @@ import ( "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" "k8s.io/klog" - v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" ) // The following code has been copied from predicates package to avoid the @@ -81,7 +81,7 @@ func podMatchesNodeLabels(pod *v1.Pod, node *v1.Node) bool { // terms are ORed, and an empty list of terms will match nothing. func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSelectorTerm) bool { for _, req := range nodeSelectorTerms { - nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(req.MatchExpressions) + nodeSelector, err := NodeSelectorRequirementsAsSelector(req.MatchExpressions) if err != nil { klog.V(10).Infof("Failed to parse MatchExpressions: %+v, regarding as not match.", req.MatchExpressions) return false @@ -92,3 +92,37 @@ func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSele } return false } + +// NodeSelectorRequirementsAsSelector converts the []NodeSelectorRequirement api type into a struct that implements +// labels.Selector. +func NodeSelectorRequirementsAsSelector(nsm []v1.NodeSelectorRequirement) (labels.Selector, error) { + if len(nsm) == 0 { + return labels.Nothing(), nil + } + selector := labels.NewSelector() + for _, expr := range nsm { + var op selection.Operator + switch expr.Operator { + case v1.NodeSelectorOpIn: + op = selection.In + case v1.NodeSelectorOpNotIn: + op = selection.NotIn + case v1.NodeSelectorOpExists: + op = selection.Exists + case v1.NodeSelectorOpDoesNotExist: + op = selection.DoesNotExist + case v1.NodeSelectorOpGt: + op = selection.GreaterThan + case v1.NodeSelectorOpLt: + op = selection.LessThan + default: + return nil, fmt.Errorf("%q is not a valid node selector operator", expr.Operator) + } + r, err := labels.NewRequirement(expr.Key, op, expr.Values) + if err != nil { + return nil, err + } + selector = selector.Add(*r) + } + return selector, nil +} diff --git a/pkg/utils/priority.go b/pkg/utils/priority.go new file mode 100644 index 0000000000..a5a5737cf7 --- /dev/null +++ b/pkg/utils/priority.go @@ -0,0 +1,35 @@ +package utils + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" +) + +const SystemCriticalPriority = 2 * int32(1000000000) + +// GetNamespacesFromPodAffinityTerm returns a set of names +// according to the namespaces indicated in podAffinityTerm. +// If namespaces is empty it considers the given pod's namespace. +func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.String { + names := sets.String{} + if len(podAffinityTerm.Namespaces) == 0 { + names.Insert(pod.Namespace) + } else { + names.Insert(podAffinityTerm.Namespaces...) + } + return names +} + +// PodMatchesTermsNamespaceAndSelector returns true if the given +// matches the namespace and selector defined by `s . +func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.String, selector labels.Selector) bool { + if !namespaces.Has(pod.Namespace) { + return false + } + + if !selector.Matches(labels.Set(pod.Labels)) { + return false + } + return true +} diff --git a/pkg/utils/qos.go b/pkg/utils/qos.go new file mode 100644 index 0000000000..56938b528b --- /dev/null +++ b/pkg/utils/qos.go @@ -0,0 +1,85 @@ +package utils + +import ( + "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/sets" +) + +var supportedQoSComputeResources = sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory)) + +// QOSList is a set of (resource name, QoS class) pairs. +type QOSList map[v1.ResourceName]v1.PodQOSClass + +func isSupportedQoSComputeResource(name v1.ResourceName) bool { + return supportedQoSComputeResources.Has(string(name)) +} + +// GetPodQOS returns the QoS class of a pod. +// A pod is besteffort if none of its containers have specified any requests or limits. +// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal. +// A pod is burstable if limits and requests do not match across all containers. +func GetPodQOS(pod *v1.Pod) v1.PodQOSClass { + requests := v1.ResourceList{} + limits := v1.ResourceList{} + zeroQuantity := resource.MustParse("0") + isGuaranteed := true + allContainers := []v1.Container{} + allContainers = append(allContainers, pod.Spec.Containers...) + allContainers = append(allContainers, pod.Spec.InitContainers...) + for _, container := range allContainers { + // process requests + for name, quantity := range container.Resources.Requests { + if !isSupportedQoSComputeResource(name) { + continue + } + if quantity.Cmp(zeroQuantity) == 1 { + delta := quantity.DeepCopy() + if _, exists := requests[name]; !exists { + requests[name] = delta + } else { + delta.Add(requests[name]) + requests[name] = delta + } + } + } + // process limits + qosLimitsFound := sets.NewString() + for name, quantity := range container.Resources.Limits { + if !isSupportedQoSComputeResource(name) { + continue + } + if quantity.Cmp(zeroQuantity) == 1 { + qosLimitsFound.Insert(string(name)) + delta := quantity.DeepCopy() + if _, exists := limits[name]; !exists { + limits[name] = delta + } else { + delta.Add(limits[name]) + limits[name] = delta + } + } + } + + if !qosLimitsFound.HasAll(string(v1.ResourceMemory), string(v1.ResourceCPU)) { + isGuaranteed = false + } + } + if len(requests) == 0 && len(limits) == 0 { + return v1.PodQOSBestEffort + } + // Check is requests match limits for all resources. + if isGuaranteed { + for name, req := range requests { + if lim, exists := limits[name]; !exists || lim.Cmp(req) != 0 { + isGuaranteed = false + break + } + } + } + if isGuaranteed && + len(requests) == len(limits) { + return v1.PodQOSGuaranteed + } + return v1.PodQOSBurstable +} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 985bfc8598..29ecce60c8 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -15,3 +15,21 @@ limitations under the License. */ package utils + +import v1 "k8s.io/api/core/v1" + +// This file contains the datastructures, types & functions needed by all the strategies so that we don't have +// to compute them again in each strategy. + +// NodePodEvictedCount keeps count of pods evicted on node. This is used in conjunction with strategies to +type NodePodEvictedCount map[*v1.Node]int + +// InitializeNodePodCount initializes the nodePodCount. +func InitializeNodePodCount(nodeList []*v1.Node) NodePodEvictedCount { + var nodePodCount = make(NodePodEvictedCount) + for _, node := range nodeList { + // Initialize podsEvicted till now with 0. + nodePodCount[node] = 0 + } + return nodePodCount +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 2bf8106f0a..5edf7ca417 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -26,7 +26,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog" - "k8s.io/kubernetes/pkg/api/testapi" "sigs.k8s.io/descheduler/cmd/descheduler/app/options" "sigs.k8s.io/descheduler/pkg/api" deschedulerapi "sigs.k8s.io/descheduler/pkg/api" @@ -36,6 +35,7 @@ import ( nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" "sigs.k8s.io/descheduler/pkg/descheduler/strategies" + "sigs.k8s.io/descheduler/pkg/utils" ) func MakePodSpec() v1.PodSpec { @@ -72,7 +72,7 @@ func RcByNameContainer(name string, replicas int32, labels map[string]string, gr return &v1.ReplicationController{ TypeMeta: metav1.TypeMeta{ Kind: "ReplicationController", - APIVersion: testapi.Groups[v1.GroupName].GroupVersion().String(), + APIVersion: "v1", }, ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -116,7 +116,7 @@ func startEndToEndForLowNodeUtilization(clientset clientset.Interface) { nodeUtilizationStrategyParams := deschedulerapi.StrategyParameters{NodeResourceUtilizationThresholds: nodeUtilizationThresholds} lowNodeUtilizationStrategy := deschedulerapi.DeschedulerStrategy{Enabled: true, Params: nodeUtilizationStrategyParams} ds := &options.DeschedulerServer{Client: clientset} - nodePodCount := strategies.InitializeNodePodCount(nodes) + nodePodCount := utils.InitializeNodePodCount(nodes) strategies.LowNodeUtilization(ds, lowNodeUtilizationStrategy, evictionPolicyGroupVersion, nodes, nodePodCount) time.Sleep(10 * time.Second) }