diff --git a/cluster-autoscaler/cloudprovider/kwok/kwok.go b/cluster-autoscaler/cloudprovider/kwok/kwok.go index 2ace5b0f22f8..5113515f7ce5 100644 --- a/cluster-autoscaler/cloudprovider/kwok/kwok.go +++ b/cluster-autoscaler/cloudprovider/kwok/kwok.go @@ -62,7 +62,7 @@ const ( // KwokManagedAnnotation is the default annotation // that kwok manages to decide if it should manage // a node it sees in the cluster - KwokManagedAnnotation = "kwok.x-k8s.io/node=fake" + KwokManagedAnnotation = "kwok.x-k8s.io/node" // // GPULabel is the label added to nodes with GPU resource. // GPULabel = "cloud.google.com/gke-accelerator" @@ -85,8 +85,7 @@ type KwokCloudProvider struct { // kubeClient is to be used only for create, delete and update kubeClient *kubeclient.Clientset // lister is to be used for get and list - lister kube_util.NodeLister - provider string + lister kube_util.NodeLister } // Name returns name of the cloud provider. @@ -107,17 +106,13 @@ func (kwok *KwokCloudProvider) NodeGroups() []cloudprovider.NodeGroup { func (kwok *KwokCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { // Skip nodes that are not managed by kwok cloud provider. if !strings.HasPrefix(node.Spec.ProviderID, ProviderName) { + klog.V(2).Infof("ignoring node '%s' because it is not managed by kwok", node.GetName()) return nil, nil } - annotations := node.GetAnnotations() - nodeGroupName := annotations[NGNameAnnotation] - if nodeGroupName == "" { - return nil, fmt.Errorf("nodegroup name annotation '%s' not found on the node '%s'", NGNameAnnotation, node.GetName()) - } - for _, nodeGroup := range kwok.nodeGroups { - if nodeGroup.name == nodeGroupName { + if nodeGroup.name == getNGNameFromAnnotation(node) { + klog.V(5).Infof("found nodegroup '%s' for node '%s'", nodeGroup.name, node.GetName()) return nodeGroup, nil } } @@ -176,19 +171,22 @@ func (kwok *KwokCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider // Refresh is called before every main loop and can be used to dynamically update cloud provider state. // In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh(). func (kwok *KwokCloudProvider) Refresh() error { - nodeList, err := kwok.lister.List() - if err != nil { - return err - } - ngs := []*NodeGroup{} - for _, no := range nodeList { - ng := parseAnnotationsToNodegroup(no) - ng.kubeClient = kwok.kubeClient - ngs = append(ngs, ng) - } + // TODO(vadasambar): causes CA to not recognize kwok nodegroups + // needs better implementation + // nodeList, err := kwok.lister.List() + // if err != nil { + // return err + // } + + // ngs := []*NodeGroup{} + // for _, no := range nodeList { + // ng := parseAnnotationsToNodegroup(no) + // ng.kubeClient = kwok.kubeClient + // ngs = append(ngs, ng) + // } - kwok.nodeGroups = ngs + // kwok.nodeGroups = ngs return nil } @@ -254,9 +252,10 @@ func (nodeGroup *NodeGroup) IncreaseSize(delta int) error { for i := 0; i < delta; i++ { node := schedNode.Node() node.Name = fmt.Sprintf("%s-%s", nodeGroup.name, rand.String(5)) + node.Spec.ProviderID = fmt.Sprintf("kwok:%s", node.Name) _, err := nodeGroup.kubeClient.CoreV1().Nodes().Create(context.Background(), node, v1.CreateOptions{}) if err != nil { - return fmt.Errorf("couldn't create new node '%s'", node.Name) + return fmt.Errorf("couldn't create new node '%s': %v", node.Name, err) } } @@ -319,7 +318,7 @@ func (nodeGroup *NodeGroup) getNodeNamesForNodeGroup() ([]string, error) { } for _, no := range nodeList { - if no.GetAnnotations()[NGNameAnnotation] == nodeGroup.Id() { + if getNGNameFromAnnotation(no) == nodeGroup.Id() { names = append(names, no.GetName()) } } @@ -484,7 +483,9 @@ func parseNodeTemplates(data []byte, kubeClient *kubeclient.Clientset, lister ku ngs := []*NodeGroup{} for _, no := range nodeTemplates { + sanitizeNode(no) no.Annotations[KwokManagedAnnotation] = "fake" + no.Spec.ProviderID = fmt.Sprintf("kwok:%s", no.GetName()) ng := parseAnnotationsToNodegroup(no) ng.kubeClient = kubeClient ng.lister = lister @@ -495,21 +496,14 @@ func parseNodeTemplates(data []byte, kubeClient *kubeclient.Clientset, lister ku } -func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup { - ngName := no.GetAnnotations()[NGNameAnnotation] - - if ngName == "" { - if no.GetAnnotations()["eks.amazonaws.com/nodegroup"] != "" { - // add prefix to make it clear that this is a different nodegroup - ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["eks.amazonaws.com/nodegroup"]) - } else if no.GetAnnotations()["cloud.google.com/gke-nodepool"] != "" { - // add prefix to make it clear that this is a different nodegroup - ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["cloud.google.com/gke-nodepool"]) - } else { - klog.Fatalf("did not find nodegroup annotation on the template node '%s'", no.GetName()) - } - } +func sanitizeNode(no *apiv1.Node) { + no.ResourceVersion = "" + no.Generation = 0 + no.UID = "" + no.CreationTimestamp = v1.Time{} +} +func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup { min := 0 max := 200 target := min @@ -550,10 +544,30 @@ func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup { no.Name = fmt.Sprintf("kwok-fake-%s", no.GetName()) return &NodeGroup{ - name: ngName, + name: getNGNameFromAnnotation(no), minSize: min, maxSize: max, targetSize: target, nodeTemplate: no, } } + +func getNGNameFromAnnotation(no *apiv1.Node) string { + ngName := no.GetAnnotations()[NGNameAnnotation] + + if ngName != "" { + return ngName + } + + if no.GetAnnotations()["eks.amazonaws.com/nodegroup"] != "" { + // add prefix to make it clear that this is a different nodegroup + ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["eks.amazonaws.com/nodegroup"]) + } else if no.GetAnnotations()["cloud.google.com/gke-nodepool"] != "" { + // add prefix to make it clear that this is a different nodegroup + ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["cloud.google.com/gke-nodepool"]) + } else { + klog.Fatalf("did not find nodegroup annotation on the template node '%s'", no.GetName()) + } + + return ngName +}