From 8ca3afc35b4ca895895525f3a084571d32363a1f Mon Sep 17 00:00:00 2001 From: michael mccune Date: Mon, 19 Dec 2022 13:41:04 -0500 Subject: [PATCH 1/3] update clusterapi readme with table of contents this change will make navigating the readme easier for users. --- .../cloudprovider/clusterapi/README.md | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/README.md b/cluster-autoscaler/cloudprovider/clusterapi/README.md index fb1811f5bd69..f4135e425034 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/README.md +++ b/cluster-autoscaler/cloudprovider/clusterapi/README.md @@ -5,6 +5,29 @@ the [cluster-api project](https://github.com/kubernetes-sigs/cluster-api) to manage the provisioning and de-provisioning of nodes within a Kubernetes cluster. +## Table of Contents: + +* [Kubernetes Version](#kubernetes-version) +* [Starting the Autoscaler](#starting-the-autoscaler) +* [Configuring node group auto discovery](#configuring-node-group-auto-discovery) +* [Connecting cluster-autoscaler to Cluster API management and workload Clusters](#connecting-cluster-autoscaler-to-cluster-api-management-and-workload-clusters) + * [Autoscaler running in a joined cluster using service account credentials](#autoscaler-running-in-a-joined-cluster-using-service-account-credentials) + * [Autoscaler running in workload cluster using service account credentials, with separate management cluster](#autoscaler-running-in-workload-cluster-using-service-account-credentials-with-separate-management-cluster) + * [Autoscaler running in management cluster using service account credentials, with separate workload cluster](#autoscaler-running-in-management-cluster-using-service-account-credentials-with-separate-workload-cluster) + * [Autoscaler running anywhere, with separate kubeconfigs for management and workload clusters](#autoscaler-running-anywhere-with-separate-kubeconfigs-for-management-and-workload-clusters) + * [Autoscaler running anywhere, with a common kubeconfig for management and workload clusters](#autoscaler-running-anywhere-with-a-common-kubeconfig-for-management-and-workload-clusters) +* [Enabling Autoscaling](#enabling-autoscaling) + * [Scale from zero support](#scale-from-zero-support) + * [RBAC changes for scaling from zero](#rbac-changes-for-scaling-from-zero) + * [Pre-defined labels and taints on nodes scaled from zero](#pre-defined-labels-and-taints-on-nodes-scaled-from-zero) +* [Specifying a Custom Resource Group](#specifying-a-custom-resource-group) +* [Specifying a Custom Resource Version](#specifying-a-custom-resource-version) +* [Sample manifest](#sample-manifest) + * [A note on permissions](#a-note-on-permissions) +* [Autoscaling with ClusterClass and Managed Topologies](#autoscaling-with-clusterclass-and-managed-topologies) +* [Special note on GPU instances](#special-note-on-gpu-instances) + + ## Kubernetes Version The cluster-api provider requires Kubernetes v1.16 or greater to run the From 6b80a7134af3e805683e8147f6e734df29fe77f0 Mon Sep 17 00:00:00 2001 From: michael mccune Date: Mon, 19 Dec 2022 14:14:46 -0500 Subject: [PATCH 2/3] add a note to clusterapi readme about ignored labels this change adds a section to the readme that provides advice for clusterapi users about which labels they might want to ignore when using the balance similar node groups flag on various cloud providers. --- .../cloudprovider/clusterapi/README.md | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/README.md b/cluster-autoscaler/cloudprovider/clusterapi/README.md index f4135e425034..56ba6a9cf4a0 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/README.md +++ b/cluster-autoscaler/cloudprovider/clusterapi/README.md @@ -26,6 +26,7 @@ cluster. * [A note on permissions](#a-note-on-permissions) * [Autoscaling with ClusterClass and Managed Topologies](#autoscaling-with-clusterclass-and-managed-topologies) * [Special note on GPU instances](#special-note-on-gpu-instances) +* [Special note on balancing similar node groups](#special-note-on-balancing-similar-node-groups) ## Kubernetes Version @@ -359,3 +360,60 @@ CAPI cloudprovider, the label format is as follows: `cluster-api/accelerator=` `` is arbitrary. + +## Special note on balancing similar node groups + +The Cluster Autoscaler feature to enable balancing similar node groups +(activated with the `--balance-similar-node-groups` flag) is a powerful and +popular feature. When enabled, the Cluster Autoscaler will attempt to create +new nodes by adding them in a manner that balances the creation between +similar node groups. With Cluster API, these node groups correspond directly +to the scalable resources associated (usually MachineDeployments and MachineSets) +with the nodes in question. In order for the nodes of these scalable resources +to be considered similar by the Cluster Autoscaler, they must have the same +capacity, labels, and taints for the nodes which will be created from them. + +To help assist the Cluster Autoscaler in determining which node groups are +similar, the command line flags `--balancing-ignore-label` and +`--balancing-label` are provided. For an expanded discussion about balancing +similar node groups and the options which are available, please see the +[Cluster Autoscaler FAQ](../../FAQ.md). + +Because Cluster API can address many different cloud providers, it is important +to configure the balancing labels to ignore provider-specific labels which +are used for carrying zonal information on Kubernetes nodes. The Cluster +Autoscaler implementation for Cluster API does not assume any labels (aside from +the [well-known Kubernetes labels](https://kubernetes.io/docs/reference/labels-annotations-taints/)) +to be ignored when running. Users must configure their Cluster Autoscaler deployment +to ignore labels which might be different between nodes, but which do not +otherwise affect node behavior or size (for example when two MachineDeployments +are the same except for their deployment zones). The Cluster API community has +decided not to carry cloud provider specific labels in the Cluster Autoscaler +to reduce the possibility for labels to clash between providers. Additionally, +the community has agreed to promote documentation and the use of the `--balancing-ignore-label` +flag as the preferred method of deployment to reduce the extended need for +maintenance on the Cluster Autoscaler when new providers are added or updated. +For further context around this decision, please see the +[Cluster API Deep Dive into Cluster Autoscaler Node Group Balancing discussion from 2022-09-12](https://www.youtube.com/watch?v=jbhca_9oPuQ&t=5s). + +The following table shows some of the most common labels used by cloud providers +to designate regional or zonal information on Kubernetes nodes. It is shared +here as a reference for users who might be deploying on these infrastructures. + +| Cloud Provider | Label to ignore | Notes | +| --- | --- | --- | +| Alibaba Cloud | `topology.diskplugin.csi.alibabacloud.com/zone` | Used by the Alibaba Cloud CSI driver as a target for persistent volume node affinity | +| AWS | `alpha.eksctl.io/instance-id` | Used by `eksctl` to identify instances | +| AWS | `alpha.eksctl.io/nodegroup-name` | Used by `eksctl` to identify node group names | +| AWS | `eks.amazonaws.com/nodegroup` | Used by EKS to identify node groups | +| AWS | `k8s.amazonaws.com/eniConfig` | Used by the AWS CNI for custom networking | +| AWS | `lifecycle` | Used by AWS as a label for spot instances | +| AWS | `topology.ebs.csi.aws.com/zone` | Used by the AWS EBS CSI driver as a target for persistent volume node affinity | +| Azure | `topology.disk.csi.azure.com/zone` | Used as the topology key by the Azure Disk CSI driver | +| Azure | `agentpool` | Legacy label used to specify to which Azure node pool a particular node belongs | +| Azure | `kubernetes.azure.com/agentpool` | Used by AKS to identify to which node pool a particular node belongs | +| GCE | `topology.gke.io/zone` | Used to specify the zone of the node | +| IBM Cloud | `ibm-cloud.kubernetes.io/worker-id` | Used by the IBM Cloud Cloud Controller Manager to identify the node | +| IBM Cloud | `vpc-block-csi-driver-labels` | Used by the IBM Cloud CSI driver as a target for persistent volume node affinity | +| IBM Cloud | `ibm-cloud.kubernetes.io/vpc-instance-id` | Used when a VPC is in use on IBM Cloud | + From 955396e857c52fd6de99202d5d994211427a30d9 Mon Sep 17 00:00:00 2001 From: michael mccune Date: Mon, 19 Dec 2022 14:21:42 -0500 Subject: [PATCH 3/3] remove clusterapi nodegroupset processor as discussed with the cluster api community[0], the nodegroupset processor is being removed from the clusterapi provider implementation in favor of instructing our community on the use of the --balancing-ignore-label flag. due to the wide variety of provider infrastructures that clusterapi can be deployed on, we would prefer to not encode all of these labels in the autoscaler itself. see the linked recording for more information. [0] https://www.youtube.com/watch?v=jbhca_9oPuQ --- cluster-autoscaler/main.go | 2 - .../nodegroupset/clusterapi_nodegroups.go | 47 --------- .../clusterapi_nodegroups_test.go | 98 ------------------- 3 files changed, 147 deletions(-) delete mode 100644 cluster-autoscaler/processors/nodegroupset/clusterapi_nodegroups.go delete mode 100644 cluster-autoscaler/processors/nodegroupset/clusterapi_nodegroups_test.go diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index d1562adefbcd..41f7ee9b9235 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -416,8 +416,6 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter } else if autoscalingOptions.CloudProviderName == cloudprovider.GceProviderName { nodeInfoComparatorBuilder = nodegroupset.CreateGceNodeInfoComparator opts.Processors.TemplateNodeInfoProvider = nodeinfosprovider.NewAnnotationNodeInfoProvider(nodeInfoCacheExpireTime) - } else if autoscalingOptions.CloudProviderName == cloudprovider.ClusterAPIProviderName { - nodeInfoComparatorBuilder = nodegroupset.CreateClusterAPINodeInfoComparator } nodeInfoComparator = nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels, autoscalingOptions.NodeGroupSetRatios) } diff --git a/cluster-autoscaler/processors/nodegroupset/clusterapi_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/clusterapi_nodegroups.go deleted file mode 100644 index 64188aea7e88..000000000000 --- a/cluster-autoscaler/processors/nodegroupset/clusterapi_nodegroups.go +++ /dev/null @@ -1,47 +0,0 @@ -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package nodegroupset - -import ( - "k8s.io/autoscaler/cluster-autoscaler/config" - schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" -) - -// CreateClusterAPINodeInfoComparator returns a comparator that checks if two nodes should be considered -// part of the same NodeGroupSet. This is true if they match usual conditions checked by IsCloudProviderNodeInfoSimilar, -// even if they have different infrastructure provider-specific labels. -func CreateClusterAPINodeInfoComparator(extraIgnoredLabels []string, ratioOpts config.NodeGroupDifferenceRatios) NodeInfoComparator { - capiIgnoredLabels := map[string]bool{ - "topology.ebs.csi.aws.com/zone": true, // this is a label used by the AWS EBS CSI driver as a target for Persistent Volume Node Affinity - "topology.diskplugin.csi.alibabacloud.com/zone": true, // this is a label used by the Alibaba Cloud CSI driver as a target for Persistent Volume Node Affinity - "ibm-cloud.kubernetes.io/worker-id": true, // this is a label used by the IBM Cloud Cloud Controler Manager - "vpc-block-csi-driver-labels": true, // this is a label used by the IBM Cloud CSI driver as a target for Persisten Volume Node Affinity - - } - - for k, v := range BasicIgnoredLabels { - capiIgnoredLabels[k] = v - } - - for _, k := range extraIgnoredLabels { - capiIgnoredLabels[k] = true - } - - return func(n1, n2 *schedulerframework.NodeInfo) bool { - return IsCloudProviderNodeInfoSimilar(n1, n2, capiIgnoredLabels, ratioOpts) - } -} diff --git a/cluster-autoscaler/processors/nodegroupset/clusterapi_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/clusterapi_nodegroups_test.go deleted file mode 100644 index df6969223b12..000000000000 --- a/cluster-autoscaler/processors/nodegroupset/clusterapi_nodegroups_test.go +++ /dev/null @@ -1,98 +0,0 @@ -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package nodegroupset - -import ( - "testing" - - "k8s.io/autoscaler/cluster-autoscaler/config" - "k8s.io/autoscaler/cluster-autoscaler/context" - . "k8s.io/autoscaler/cluster-autoscaler/utils/test" -) - -func TestIsClusterAPINodeInfoSimilar(t *testing.T) { - comparator := CreateClusterAPINodeInfoComparator([]string{}, config.NodeGroupDifferenceRatios{}) - node1 := BuildTestNode("node1", 1000, 2000) - node2 := BuildTestNode("node2", 1000, 2000) - - for _, tc := range []struct { - description string - label string - value1 string - value2 string - removeOneLabel bool - }{ - { - description: "topology.ebs.csi.aws.com/zone empty value", - label: "topology.ebs.csi.aws.com/zone", - value1: "", - value2: "", - removeOneLabel: false, - }, - { - description: "topology.ebs.csi.aws.com/zone different values", - label: "topology.ebs.csi.aws.com/zone", - value1: "foo", - value2: "bar", - removeOneLabel: false, - }, - { - description: "topology.diskplugin.csi.alibabacloud.com/zone different values", - label: "topology.diskplugin.csi.alibabacloud.com/zone", - value1: "foo", - value2: "bar", - removeOneLabel: false, - }, - { - description: "ibm-cloud.kubernetes.io/worker-id different values", - label: "ibm-cloud.kubernetes.io/worker-id", - value1: "foo", - value2: "bar", - removeOneLabel: false, - }, - { - description: "vpc-block-csi-driver-labels different values", - label: "vpc-block-csi-driver-labels", - value1: "foo", - value2: "bar", - removeOneLabel: false, - }, - { - description: "topology.ebs.csi.aws.com/zone one node labeled", - label: "topology.ebs.csi.aws.com/zone", - value1: "foo", - value2: "bar", - removeOneLabel: true, - }, - } { - t.Run(tc.description, func(t *testing.T) { - node1.ObjectMeta.Labels[tc.label] = tc.value1 - node2.ObjectMeta.Labels[tc.label] = tc.value2 - if tc.removeOneLabel { - delete(node2.ObjectMeta.Labels, tc.label) - } - checkNodesSimilar(t, node1, node2, comparator, true) - }) - } -} - -func TestFindSimilarNodeGroupsClusterAPIBasic(t *testing.T) { - context := &context.AutoscalingContext{} - ni1, ni2, ni3 := buildBasicNodeGroups(context) - processor := &BalancingNodeGroupSetProcessor{Comparator: CreateClusterAPINodeInfoComparator([]string{}, config.NodeGroupDifferenceRatios{})} - basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3) -}