diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 7515b87..9fe1a8f 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -29,7 +29,6 @@ import ( "github.com/patrickmn/go-cache" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -179,11 +178,6 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1alpha1.KubeletConfigur return nil, fmt.Errorf("failed to get cluster CNI: %w", err) } - nodeResourceOverhead, err := p.nodeOverhead(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get node resource overhead: %w", err) - } - result := lo.Map(p.instanceTypesInfo, func(i *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType, _ int) *cloudprovider.InstanceType { zoneData := lo.Map(allZones.UnsortedList(), func(zoneID string, _ int) ZoneData { if !p.instanceTypesOfferings[lo.FromPtr(i.InstanceTypeId)].Has(zoneID) || !vSwitchsZones.Has(zoneID) { @@ -203,7 +197,7 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1alpha1.KubeletConfigur // so that Karpenter is able to cache the set of InstanceTypes based on values that alter the set of instance types // !!! Important !!! offers := p.createOfferings(ctx, *i.InstanceTypeId, zoneData) - return NewInstanceType(ctx, nodeResourceOverhead, i, kc, p.region, nodeClass.Spec.SystemDisk, offers, clusterCNI) + return NewInstanceType(ctx, i, kc, p.region, nodeClass.Spec.SystemDisk, offers, clusterCNI) }) // Filter out nil values @@ -213,37 +207,6 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1alpha1.KubeletConfigur return result, nil } -func (p *DefaultProvider) nodeOverhead(ctx context.Context) (corev1.ResourceList, error) { - var nodes corev1.NodeList - if err := p.kubeClient.List(ctx, &nodes); err != nil { - return corev1.ResourceList{}, err - } - - // We do not sure how to calculate the overhead of the node, let's just use the maximum possible - // To avoid some loop node creation - maxCPUOverHead := int64(0) - maxMemoryOverHead := int64(0) - for _, node := range nodes.Items { - capacity := node.Status.Capacity - allocatable := node.Status.Allocatable - - cpuOverHead := capacity.Cpu().MilliValue() - allocatable.Cpu().MilliValue() - memoryOverHead := capacity.Memory().Value() - allocatable.Memory().Value() - - if cpuOverHead > maxCPUOverHead { - maxCPUOverHead = cpuOverHead - } - if memoryOverHead > maxMemoryOverHead { - maxMemoryOverHead = memoryOverHead - } - } - - return corev1.ResourceList{ - corev1.ResourceCPU: *resource.NewMilliQuantity(maxCPUOverHead, resource.DecimalSI), - corev1.ResourceMemory: *resource.NewQuantity(maxMemoryOverHead, resource.DecimalSI), - }, nil -} - func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { // DO NOT REMOVE THIS LOCK ---------------------------------------------------------------------------- // We lock here so that multiple callers to getInstanceTypesOfferings do not result in cache misses and multiple diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index c8fbd29..f516a6d 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -57,7 +57,52 @@ type ZoneData struct { Available bool } -func NewInstanceType(ctx context.Context, overhead corev1.ResourceList, +func calculateResourceOverhead(pods, cpuM, memoryMi int64) corev1.ResourceList { + // referring to: https://help.aliyun.com/zh/ack/ack-managed-and-ack-dedicated/user-guide/resource-reservation-policy#0f5ffe176df7q + // CPU overhead calculation + cpuOverHead := calculateCPUOverhead(cpuM) + + // TODO: In a real environment, the formula does not produce accurate results, + // consistently yielding values that are 200MiB larger than expected. + // Memory overhead: min(11*pods + 255, memoryMi*0.25) + memoryOverHead := int64(math.Min(float64(11*pods+255), float64(memoryMi)*0.25)) + 200 + + return corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewMilliQuantity(cpuOverHead, resource.DecimalSI), + corev1.ResourceMemory: *resources.Quantity(fmt.Sprintf("%dMi", memoryOverHead)), + } +} + +// thresholds defines CPU overhead thresholds and their corresponding percentages +var thresholds = [...]struct { + cores int64 + overhead float64 +}{ + {1000, 0.06}, + {3000, 0.01}, + {3000, 0.005}, + {4000, 0.005}, +} + +func calculateCPUOverhead(cpuM int64) int64 { + var cpuOverHead int64 + + // Calculate overhead for each threshold + for _, t := range thresholds { + if cpuM >= t.cores { + cpuOverHead += int64(1000 * t.overhead) + } + } + + // Additional overhead for CPU > 4 cores (0.25%) + if cpuM > 4000 { + cpuOverHead += int64(float64(cpuM-4000) * 0.0025) + } + + return cpuOverHead +} + +func NewInstanceType(ctx context.Context, info *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType, kc *v1alpha1.KubeletConfiguration, region string, systemDisk *v1alpha1.SystemDisk, offerings cloudprovider.Offerings, clusterCNI string) *cloudprovider.InstanceType { @@ -71,12 +116,15 @@ func NewInstanceType(ctx context.Context, overhead corev1.ResourceList, Offerings: offerings, Capacity: computeCapacity(ctx, info, kc.MaxPods, kc.PodsPerCore, systemDisk, clusterCNI), Overhead: &cloudprovider.InstanceTypeOverhead{ - // Follow overhead will be merged, so we can set only one overhead totally - KubeReserved: overhead, + KubeReserved: corev1.ResourceList{}, SystemReserved: corev1.ResourceList{}, EvictionThreshold: corev1.ResourceList{}, }, } + + // Follow KubeReserved/SystemReserved/EvictionThreshold will be merged, so we can set only one overhead totally + it.Overhead.KubeReserved = calculateResourceOverhead(it.Capacity.Pods().Value(), + it.Capacity.Cpu().MilliValue(), extractMemory(info).Value()/MiBByteRatio) if it.Requirements.Compatible(scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, string(corev1.Windows)))) == nil { it.Capacity[v1alpha1.ResourcePrivateIPv4Address] = *privateIPv4Address(info) } @@ -206,9 +254,13 @@ func cpu(info *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceT return resources.Quantity(fmt.Sprint(*info.CpuCoreCount)) } -func memory(ctx context.Context, info *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType) *resource.Quantity { +func extractMemory(info *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType) *resource.Quantity { sizeInGib := tea.Float32Value(info.MemorySize) - mem := resources.Quantity(fmt.Sprintf("%fGi", sizeInGib)) + return resources.Quantity(fmt.Sprintf("%fGi", sizeInGib)) +} + +func memory(ctx context.Context, info *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType) *resource.Quantity { + mem := extractMemory(info) if mem.IsZero() { return mem }