Skip to content

Commit

Permalink
fix: CA node recognizing fake nodegroups
Browse files Browse the repository at this point in the history
- add provider ID to nodes in the format `kwok:<node-name>`
- fix invalid `KwokManagedAnnotation`
- sanitize template nodes (remove `resourceVersion` etc.,)
- not sanitizing the node leads to error during creation of new nodes
- abstract code to get NG name into a separate function `getNGNameFromAnnotation`
Signed-off-by: vadasambar <surajrbanakar@gmail.com>
  • Loading branch information
vadasambar committed Jun 7, 2023
1 parent 094f1b0 commit a65dee8
Showing 1 changed file with 52 additions and 38 deletions.
90 changes: 52 additions & 38 deletions cluster-autoscaler/cloudprovider/kwok/kwok.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ const (
// KwokManagedAnnotation is the default annotation
// that kwok manages to decide if it should manage
// a node it sees in the cluster
KwokManagedAnnotation = "kwok.x-k8s.io/node=fake"
KwokManagedAnnotation = "kwok.x-k8s.io/node"

// // GPULabel is the label added to nodes with GPU resource.
// GPULabel = "cloud.google.com/gke-accelerator"
Expand All @@ -85,8 +85,7 @@ type KwokCloudProvider struct {
// kubeClient is to be used only for create, delete and update
kubeClient *kubeclient.Clientset
// lister is to be used for get and list
lister kube_util.NodeLister
provider string
lister kube_util.NodeLister
}

// Name returns name of the cloud provider.
Expand All @@ -107,17 +106,13 @@ func (kwok *KwokCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
func (kwok *KwokCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
// Skip nodes that are not managed by kwok cloud provider.
if !strings.HasPrefix(node.Spec.ProviderID, ProviderName) {
klog.V(2).Infof("ignoring node '%s' because it is not managed by kwok", node.GetName())
return nil, nil
}

annotations := node.GetAnnotations()
nodeGroupName := annotations[NGNameAnnotation]
if nodeGroupName == "" {
return nil, fmt.Errorf("nodegroup name annotation '%s' not found on the node '%s'", NGNameAnnotation, node.GetName())
}

for _, nodeGroup := range kwok.nodeGroups {
if nodeGroup.name == nodeGroupName {
if nodeGroup.name == getNGNameFromAnnotation(node) {
klog.V(5).Infof("found nodegroup '%s' for node '%s'", nodeGroup.name, node.GetName())
return nodeGroup, nil
}
}
Expand Down Expand Up @@ -176,19 +171,22 @@ func (kwok *KwokCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider
// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
func (kwok *KwokCloudProvider) Refresh() error {
nodeList, err := kwok.lister.List()
if err != nil {
return err
}

ngs := []*NodeGroup{}
for _, no := range nodeList {
ng := parseAnnotationsToNodegroup(no)
ng.kubeClient = kwok.kubeClient
ngs = append(ngs, ng)
}
// TODO(vadasambar): causes CA to not recognize kwok nodegroups
// needs better implementation
// nodeList, err := kwok.lister.List()
// if err != nil {
// return err
// }

// ngs := []*NodeGroup{}
// for _, no := range nodeList {
// ng := parseAnnotationsToNodegroup(no)
// ng.kubeClient = kwok.kubeClient
// ngs = append(ngs, ng)
// }

kwok.nodeGroups = ngs
// kwok.nodeGroups = ngs

return nil
}
Expand Down Expand Up @@ -254,9 +252,10 @@ func (nodeGroup *NodeGroup) IncreaseSize(delta int) error {
for i := 0; i < delta; i++ {
node := schedNode.Node()
node.Name = fmt.Sprintf("%s-%s", nodeGroup.name, rand.String(5))
node.Spec.ProviderID = fmt.Sprintf("kwok:%s", node.Name)
_, err := nodeGroup.kubeClient.CoreV1().Nodes().Create(context.Background(), node, v1.CreateOptions{})
if err != nil {
return fmt.Errorf("couldn't create new node '%s'", node.Name)
return fmt.Errorf("couldn't create new node '%s': %v", node.Name, err)
}
}

Expand Down Expand Up @@ -319,7 +318,7 @@ func (nodeGroup *NodeGroup) getNodeNamesForNodeGroup() ([]string, error) {
}

for _, no := range nodeList {
if no.GetAnnotations()[NGNameAnnotation] == nodeGroup.Id() {
if getNGNameFromAnnotation(no) == nodeGroup.Id() {
names = append(names, no.GetName())
}
}
Expand Down Expand Up @@ -484,7 +483,9 @@ func parseNodeTemplates(data []byte, kubeClient *kubeclient.Clientset, lister ku

ngs := []*NodeGroup{}
for _, no := range nodeTemplates {
sanitizeNode(no)
no.Annotations[KwokManagedAnnotation] = "fake"
no.Spec.ProviderID = fmt.Sprintf("kwok:%s", no.GetName())
ng := parseAnnotationsToNodegroup(no)
ng.kubeClient = kubeClient
ng.lister = lister
Expand All @@ -495,21 +496,14 @@ func parseNodeTemplates(data []byte, kubeClient *kubeclient.Clientset, lister ku

}

func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup {
ngName := no.GetAnnotations()[NGNameAnnotation]

if ngName == "" {
if no.GetAnnotations()["eks.amazonaws.com/nodegroup"] != "" {
// add prefix to make it clear that this is a different nodegroup
ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["eks.amazonaws.com/nodegroup"])
} else if no.GetAnnotations()["cloud.google.com/gke-nodepool"] != "" {
// add prefix to make it clear that this is a different nodegroup
ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["cloud.google.com/gke-nodepool"])
} else {
klog.Fatalf("did not find nodegroup annotation on the template node '%s'", no.GetName())
}
}
func sanitizeNode(no *apiv1.Node) {
no.ResourceVersion = ""
no.Generation = 0
no.UID = ""
no.CreationTimestamp = v1.Time{}
}

func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup {
min := 0
max := 200
target := min
Expand Down Expand Up @@ -550,10 +544,30 @@ func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup {
no.Name = fmt.Sprintf("kwok-fake-%s", no.GetName())

return &NodeGroup{
name: ngName,
name: getNGNameFromAnnotation(no),
minSize: min,
maxSize: max,
targetSize: target,
nodeTemplate: no,
}
}

func getNGNameFromAnnotation(no *apiv1.Node) string {
ngName := no.GetAnnotations()[NGNameAnnotation]

if ngName != "" {
return ngName
}

if no.GetAnnotations()["eks.amazonaws.com/nodegroup"] != "" {
// add prefix to make it clear that this is a different nodegroup
ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["eks.amazonaws.com/nodegroup"])
} else if no.GetAnnotations()["cloud.google.com/gke-nodepool"] != "" {
// add prefix to make it clear that this is a different nodegroup
ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["cloud.google.com/gke-nodepool"])
} else {
klog.Fatalf("did not find nodegroup annotation on the template node '%s'", no.GetName())
}

return ngName
}

0 comments on commit a65dee8

Please sign in to comment.