diff --git a/README.md b/README.md index 34d8ec8..f3eade0 100644 --- a/README.md +++ b/README.md @@ -56,22 +56,23 @@ Follow Kubestitute documentation for Helm deployment [here](./helm/kubestitute). ### Optional args The kubestitute container takes as argument the parameters below. -| Key | Description | Default | -| ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------ | -| clusterautoscaler-namespace | The namespace the clusterautoscaler belongs to. | kube-system | -| clusterautoscaler-status-name | The names of the clusterautoscaler status configmap. | cluster-autoscaler-status | -| cluster-autoscaler-priority-expander-config-map | The name of the clusterautoscaler priority expander config map. | cluster-autoscaler-priority-expander | -| priority-expander-enabled | Is the PriorityExpander controller enabled. | `false` | -| priority-expander-namespace | The namespace the _unique_ priority expander object belongs to. | kubestitute-system | -| priority-expander-name | The only accepted name for the priority expander object. | priority-expander-default | -| dev | Enable dev mode for logging. | `false` | -| v | Logs verbosity. 0 => panic, 1 => error, 2 => warning, 3 => info, 4 => debug | 3 | -| asg-poll-interval | AutoScaling Groups polling interval (used to generate custom metrics about ASGs). | 30 | -| eviction-timeout | The timeout in seconds for pods eviction on Instance deletion. | 300 | -| instances-max-concurrent-reconciles | The maximum number of concurrent Reconciles which can be run for Instances. | 10 | -| metrics-bind-address | The address the metric endpoint binds to. | :8080 | -| health-probe-bind-address | The address the probe endpoint binds to. | :8081 | -| leader-elect | Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager. | `false` | +| Key | Description | Default | +| ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------ | +| clusterautoscaler-namespace | The namespace the clusterautoscaler belongs to. | kube-system | +| clusterautoscaler-status-name | The names of the clusterautoscaler status configmap. | cluster-autoscaler-status | +| cluster-autoscaler-priority-expander-config-map | The name of the clusterautoscaler priority expander config map. | cluster-autoscaler-priority-expander | +| clusterautoscaler-status-legacy-format | Set if the clusterautoscaler status configmap is formatted the legacy readable format, used by cluster-autoscaler up to version 1.29. | `false` | +| priority-expander-enabled | Is the PriorityExpander controller enabled. | `false` | +| priority-expander-namespace | The namespace the _unique_ priority expander object belongs to. | kubestitute-system | +| priority-expander-name | The only accepted name for the priority expander object. | priority-expander-default | +| dev | Enable dev mode for logging. | `false` | +| v | Logs verbosity. 0 => panic, 1 => error, 2 => warning, 3 => info, 4 => debug | 3 | +| asg-poll-interval | AutoScaling Groups polling interval (used to generate custom metrics about ASGs). | 30 | +| eviction-timeout | The timeout in seconds for pods eviction on Instance deletion. | 300 | +| instances-max-concurrent-reconciles | The maximum number of concurrent Reconciles which can be run for Instances. | 10 | +| metrics-bind-address | The address the metric endpoint binds to. | :8080 | +| health-probe-bind-address | The address the probe endpoint binds to. | :8081 | +| leader-elect | Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager. | `false` | ## CustomResourceDefinitions diff --git a/api/v1alpha1/scheduler_types.go b/api/v1alpha1/scheduler_types.go index fd6b42d..2541f6f 100644 --- a/api/v1alpha1/scheduler_types.go +++ b/api/v1alpha1/scheduler_types.go @@ -161,7 +161,8 @@ type IntOrArithmeticOperation struct { // It is based on ASG health status. type Field string -// All Field constants +// All Field constants. +// LongNotStarted is deprecated and will always be 0. const ( FieldReady Field = "Ready" FieldUnready Field = "Unready" diff --git a/controllers/priorityexpander_controller.go b/controllers/priorityexpander_controller.go index 75dd741..cc67f95 100644 --- a/controllers/priorityexpander_controller.go +++ b/controllers/priorityexpander_controller.go @@ -44,11 +44,12 @@ import ( ) type PriorityExpanderReconcilerConfiguration struct { - ClusterAutoscalerNamespace string - ClusterAutoscalerStatusName string - ClusterAutoscalerPEConfigMapName string - PriorityExpanderNamespace string - PriorityExpanderName string + ClusterAutoscalerNamespace string + ClusterAutoscalerStatusName string + ClusterAutoscalerStatusLegacyFormat bool + ClusterAutoscalerPEConfigMapName string + PriorityExpanderNamespace string + PriorityExpanderName string } type PriorityExpanderReconciler struct { @@ -125,20 +126,30 @@ func (r *PriorityExpanderReconciler) Reconcile(ctx context.Context, req ctrl.Req } // ... and parse it. - status := clusterautoscaler.ParseReadableString(readableStatus) + var status *clusterautoscaler.ClusterAutoscalerStatus + if !r.Configuration.ClusterAutoscalerStatusLegacyFormat { + s, err := clusterautoscaler.ParseYamlStatus(readableStatus) + if err != nil { + log.Error(err, "Unable to parse status configmap yaml content") + return ctrl.Result{}, fmt.Errorf("unable to parse status configmap yaml content: %w", err) + } + status = s + } else { + status = clusterautoscaler.ParseReadableStatus(readableStatus) + } - var oroot = map[string]map[string]int32{} + oroot := map[string]map[string]int32{} for _, node := range status.NodeGroups { oroot[node.Name] = make(map[string]int32) - oroot[node.Name]["CloudProviderTarget"] = node.Health.CloudProviderTarget - oroot[node.Name]["Ready"] = node.Health.Ready - oroot[node.Name]["Unready"] = node.Health.Unready - oroot[node.Name]["NotStarted"] = node.Health.NotStarted - oroot[node.Name]["LongNotStarted"] = node.Health.LongNotStarted - oroot[node.Name]["Registered"] = node.Health.Registered - oroot[node.Name]["LongUnregistered"] = node.Health.LongUnregistered - oroot[node.Name]["MinSize"] = node.Health.MinSize - oroot[node.Name]["MaxSize"] = node.Health.MaxSize + oroot[node.Name]["CloudProviderTarget"] = int32(node.Health.CloudProviderTarget) + oroot[node.Name]["Ready"] = int32(node.Health.NodeCounts.Registered.Ready) + oroot[node.Name]["Unready"] = int32(node.Health.NodeCounts.Registered.Unready.Total) + oroot[node.Name]["NotStarted"] = int32(node.Health.NodeCounts.Registered.NotStarted) + oroot[node.Name]["LongNotStarted"] = 0 + oroot[node.Name]["Registered"] = int32(node.Health.NodeCounts.Registered.Total) + oroot[node.Name]["LongUnregistered"] = int32(node.Health.NodeCounts.LongUnregistered) + oroot[node.Name]["MinSize"] = int32(node.Health.MinSize) + oroot[node.Name]["MaxSize"] = int32(node.Health.MaxSize) } // Create new PriorityExpander template and parse it @@ -169,7 +180,6 @@ func (r *PriorityExpanderReconciler) Reconcile(ctx context.Context, req ctrl.Req } op, err := ctrl.CreateOrUpdate(ctx, r.Client, &pecm, func() error { - pecm.Data = map[string]string{ "priorities": buf.String(), } diff --git a/controllers/scheduler_controller.go b/controllers/scheduler_controller.go index 470f219..b476fcc 100644 --- a/controllers/scheduler_controller.go +++ b/controllers/scheduler_controller.go @@ -55,8 +55,9 @@ const ( // SchedulerReconcilerConfiguration wraps configuration for the SchedulerReconciler. type SchedulerReconcilerConfiguration struct { - ClusterAutoscalerNamespace string - ClusterAutoscalerStatusName string + ClusterAutoscalerNamespace string + ClusterAutoscalerStatusName string + ClusterAutoscalerStatusLegacyFormat bool } // SchedulerReconciler reconciles a Scheduler object @@ -138,13 +139,23 @@ func (r *SchedulerReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } // Parse it and retrieve NodeGroups from targets and fallbacks - status := clusterautoscaler.ParseReadableString(readableStatus) + var status *clusterautoscaler.ClusterAutoscalerStatus + if !r.Configuration.ClusterAutoscalerStatusLegacyFormat { + s, err := clusterautoscaler.ParseYamlStatus(readableStatus) + if err != nil { + log.Error(err, "Unable to parse status configmap yaml content") + return ctrl.Result{}, fmt.Errorf("unable to parse status configmap yaml content: %w", err) + } + status = s + } else { + status = clusterautoscaler.ParseReadableStatus(readableStatus) + } asgTargets := scheduler.Spec.ASGTargets if len(asgTargets) == 0 { asgTargets = []string{scheduler.Spec.ASGTarget} } - targetNodeGroups := make([]clusterautoscaler.NodeGroup, 0, len(asgTargets)) + targetNodeGroups := make([]clusterautoscaler.NodeGroupStatus, 0, len(asgTargets)) for _, target := range asgTargets { targetNodeGroup := clusterautoscaler.GetNodeGroupWithName(status.NodeGroups, target) if targetNodeGroup == nil { @@ -162,12 +173,12 @@ func (r *SchedulerReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Update target statuses for i := range targetNodeGroups { - for _, s := range []clusterautoscaler.ScaleUpStatus{ - clusterautoscaler.ScaleUpNeeded, - clusterautoscaler.ScaleUpNotNeeded, - clusterautoscaler.ScaleUpInProgress, - clusterautoscaler.ScaleUpNoActivity, - clusterautoscaler.ScaleUpBackoff, + for _, s := range []clusterautoscaler.ClusterAutoscalerConditionStatus{ + clusterautoscaler.ClusterAutoscalerNeeded, + clusterautoscaler.ClusterAutoscalerNotNeeded, + clusterautoscaler.ClusterAutoscalerInProgress, + clusterautoscaler.ClusterAutoscalerNoActivity, + clusterautoscaler.ClusterAutoscalerBackoff, } { targetNodeGroupStatus := metrics.SchedulerTargetNodeGroupStatus.With(prometheus.Labels{ "node_group_name": targetNodeGroups[i].Name, @@ -277,7 +288,7 @@ func (r *SchedulerReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( if down > 0 { scaleDownAllowed := false for i := range targetNodeGroups { - if targetNodeGroups[i].ScaleUp.Status != clusterautoscaler.ScaleUpBackoff { + if targetNodeGroups[i].ScaleUp.Status != clusterautoscaler.ClusterAutoscalerBackoff { scaleDownAllowed = true break } @@ -511,7 +522,7 @@ func getMatchedPolicy(m []matchedPolicy, p corev1alpha1.SchedulerPolicy) *matche // nodeGroupIntOrFieldValue returns the desired value matching IntOrField. // Field returns the NodeGroup Field value ans has priority over Int if a valid // Field is given. -func nodeGroupIntOrFieldValue(ngs []clusterautoscaler.NodeGroup, iof corev1alpha1.IntOrField) int32 { +func nodeGroupIntOrFieldValue(ngs []clusterautoscaler.NodeGroupStatus, iof corev1alpha1.IntOrField) int32 { if iof.FieldVal == nil { return iof.IntVal } @@ -520,31 +531,29 @@ func nodeGroupIntOrFieldValue(ngs []clusterautoscaler.NodeGroup, iof corev1alpha switch *iof.FieldVal { case corev1alpha1.FieldReady: for i := range ngs { - val += ngs[i].Health.Ready + val += int32(ngs[i].Health.NodeCounts.Registered.Ready) } case corev1alpha1.FieldUnready: for i := range ngs { - val += ngs[i].Health.Unready + val += int32(ngs[i].Health.NodeCounts.Registered.Unready.Total) } case corev1alpha1.FieldNotStarted: for i := range ngs { - val += ngs[i].Health.NotStarted + val += int32(ngs[i].Health.NodeCounts.Registered.NotStarted) } case corev1alpha1.FieldLongNotStarted: - for i := range ngs { - val += ngs[i].Health.LongNotStarted - } + // Field deprecated, do nothing. case corev1alpha1.FieldRegistered: for i := range ngs { - val += ngs[i].Health.Registered + val += int32(ngs[i].Health.NodeCounts.Registered.Total) } case corev1alpha1.FieldLongUnregistered: for i := range ngs { - val += ngs[i].Health.LongUnregistered + val += int32(ngs[i].Health.NodeCounts.LongUnregistered) } case corev1alpha1.FieldCloudProviderTarget: for i := range ngs { - val += ngs[i].Health.CloudProviderTarget + val += int32(ngs[i].Health.CloudProviderTarget) } } @@ -552,7 +561,7 @@ func nodeGroupIntOrFieldValue(ngs []clusterautoscaler.NodeGroup, iof corev1alpha } // matchPolicy returns if given NodeGroup match desired Scheduler policy. -func matchPolicy(ngs []clusterautoscaler.NodeGroup, policy corev1alpha1.SchedulerPolicy) bool { +func matchPolicy(ngs []clusterautoscaler.NodeGroupStatus, policy corev1alpha1.SchedulerPolicy) bool { left := nodeGroupIntOrFieldValue(ngs, policy.LeftOperand) right := nodeGroupIntOrFieldValue(ngs, policy.RightOperand) @@ -576,7 +585,7 @@ func matchPolicy(ngs []clusterautoscaler.NodeGroup, policy corev1alpha1.Schedule } // replicas returns the number of required replicas. -func nodeGroupReplicas(ngs []clusterautoscaler.NodeGroup, operation corev1alpha1.IntOrArithmeticOperation) int32 { +func nodeGroupReplicas(ngs []clusterautoscaler.NodeGroupStatus, operation corev1alpha1.IntOrArithmeticOperation) int32 { if operation.OperationVal == nil { return operation.IntVal } diff --git a/controllers/scheduler_controller_test.go b/controllers/scheduler_controller_test.go index 1f456df..2b24b12 100644 --- a/controllers/scheduler_controller_test.go +++ b/controllers/scheduler_controller_test.go @@ -25,14 +25,17 @@ import ( "quortex.io/kubestitute/utils/clusterautoscaler" ) -var ng = clusterautoscaler.NodeGroup{ - Health: clusterautoscaler.NodeGroupHealth{ - Health: clusterautoscaler.Health{ - Ready: 1, - Unready: 2, - NotStarted: 3, - LongNotStarted: 4, - Registered: 5, +var ng = clusterautoscaler.NodeGroupStatus{ + Health: clusterautoscaler.NodeGroupHealthCondition{ + NodeCounts: clusterautoscaler.NodeCount{ + Registered: clusterautoscaler.RegisteredNodeCount{ + Total: 5, + Ready: 1, + NotStarted: 3, + Unready: clusterautoscaler.RegisteredUnreadyNodeCount{ + Total: 2, + }, + }, LongUnregistered: 6, }, CloudProviderTarget: 7, @@ -179,7 +182,7 @@ func Test_getMatchedPolicy(t *testing.T) { func Test_nodeGroupIntOrFieldValue(t *testing.T) { type args struct { - ngs []clusterautoscaler.NodeGroup + ngs []clusterautoscaler.NodeGroupStatus iof corev1alpha1.IntOrField } tests := []struct { @@ -190,7 +193,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 1 nodegroup, no int no field should return zero", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{}, }, want: 0, @@ -198,7 +201,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 2 nodegroups, no int no field should return zero", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{}, }, want: 0, @@ -207,7 +210,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 1 nodegroup, an int no field should return the int value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, }, @@ -217,7 +220,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 2 nodegroups, an int no field should return the int value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, }, @@ -227,7 +230,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 1 nodegroup, field Ready should return the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldReady), @@ -238,7 +241,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 2 nodegroups, field Ready should return twice the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldReady), @@ -249,7 +252,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 1 nodegroup, field Unready should return the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldUnready), @@ -260,7 +263,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 2 nodegroups, field Unready should return twice the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldUnready), @@ -271,7 +274,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 1 nodegroup, field NotStarted should return the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldNotStarted), @@ -282,7 +285,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 2 nodegroups, field NotStarted should return twice the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldNotStarted), @@ -291,31 +294,31 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { want: 6, }, { - name: "with 1 nodegroup, field LongNotStarted should return the desired value", + name: "with 1 nodegroup, field LongNotStarted should return zero", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldLongNotStarted), }, }, - want: 4, + want: 0, }, { - name: "with 2 nodegroups, field LongNotStarted should return twice the desired value", + name: "with 2 nodegroups, field LongNotStarted should return zero", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldLongNotStarted), }, }, - want: 8, + want: 0, }, { name: "with 1 nodegroup, field Registered should return the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldRegistered), @@ -326,7 +329,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 2 nodegroups, field Registered should return twice the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldRegistered), @@ -337,7 +340,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 1 nodegroup, field LongUnregistered should return the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldLongUnregistered), @@ -348,7 +351,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 2 nodegroups, field LongUnregistered should return twice the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldLongUnregistered), @@ -359,7 +362,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 1 nodegroup, field CloudProviderTarget should return the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldCloudProviderTarget), @@ -370,7 +373,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { { name: "with 2 nodegroups, field CloudProviderTarget should return twice the desired value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, iof: corev1alpha1.IntOrField{ IntVal: 2, FieldVal: fieldPointer(corev1alpha1.FieldCloudProviderTarget), @@ -390,7 +393,7 @@ func Test_nodeGroupIntOrFieldValue(t *testing.T) { func Test_matchPolicy(t *testing.T) { type args struct { - ngs []clusterautoscaler.NodeGroup + ngs []clusterautoscaler.NodeGroupStatus policy corev1alpha1.SchedulerPolicy } tests := []struct { @@ -401,7 +404,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, invalid operator should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ IntVal: 1, @@ -417,7 +420,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, invalid operator should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ IntVal: 1, @@ -433,7 +436,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, from 1 / operator = / to field ready (1) should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ IntVal: 1, @@ -449,7 +452,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, from 2 / operator = / to field ready (1 * 2) should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ IntVal: 2, @@ -465,7 +468,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, from field ready / operator >= / to field ready should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldReady), @@ -481,7 +484,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, from field ready / operator >= / to field ready should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldReady), @@ -497,7 +500,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, from field ready (1) / operator = / to field unready (2) should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldReady), @@ -513,7 +516,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, from field ready (1) / operator = / to field unready (2 * 2) should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldReady), @@ -529,7 +532,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, from field unready (2) / operator > / to field notstarted (3) should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldUnready), @@ -545,7 +548,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, from field unready (2 * 2) / operator > / to field notstarted (3 * 2) should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldUnready), @@ -559,9 +562,9 @@ func Test_matchPolicy(t *testing.T) { want: false, }, { - name: "with 1 nodegroup, from field notstarted (3) / operator > / to field longnotstarted (4) should fail", + name: "with 1 nodegroup, from field notstarted (3) / operator > / to field longnotstarted (0) should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldNotStarted), @@ -575,9 +578,9 @@ func Test_matchPolicy(t *testing.T) { want: false, }, { - name: "with 2 nodegroups, from field notstarted (3 * 2) / operator > / to field longnotstarted (4 * 2) should fail", + name: "with 2 nodegroups, from field notstarted (3 * 2) / operator > / to field longnotstarted (0 * 2) should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldNotStarted), @@ -593,7 +596,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, from field cloudProviderTarget (7) / operator <= / to field ready (1) should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldCloudProviderTarget), @@ -609,7 +612,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, from field cloudProviderTarget (7 * 2) / operator <= / to field ready (1 * 2) should fail", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldCloudProviderTarget), @@ -625,7 +628,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, from field cloudProviderTarget (7) / operator > / to field ready (1) should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldCloudProviderTarget), @@ -641,7 +644,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, from field cloudProviderTarget (7 * 2) / operator > / to field ready (1 * 2) should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldCloudProviderTarget), @@ -657,7 +660,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, from field cloudProviderTarget (7) / operator != / to field ready (1) should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldCloudProviderTarget), @@ -673,7 +676,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, from field cloudProviderTarget (7 * 2) / operator != / to field ready (1 * 2) should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldCloudProviderTarget), @@ -689,7 +692,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 1 nodegroup, from field cloudProviderTarget (7) / operator < / to field ready (1) should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldLongUnregistered), @@ -705,7 +708,7 @@ func Test_matchPolicy(t *testing.T) { { name: "with 2 nodegroups, from field cloudProviderTarget (7 * 2) / operator < / to field ready (1 * 2) should succeed", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, policy: corev1alpha1.SchedulerPolicy{ LeftOperand: corev1alpha1.IntOrField{ FieldVal: fieldPointer(corev1alpha1.FieldLongUnregistered), @@ -730,7 +733,7 @@ func Test_matchPolicy(t *testing.T) { func Test_nodeGroupReplicas(t *testing.T) { type args struct { - ngs []clusterautoscaler.NodeGroup + ngs []clusterautoscaler.NodeGroupStatus operation corev1alpha1.IntOrArithmeticOperation } tests := []struct { @@ -741,7 +744,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 1 nodegroup, no operation should return int value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 3, OperationVal: nil, @@ -752,7 +755,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 2 nodegroups, no operation should return int value", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 3, OperationVal: nil, @@ -763,7 +766,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 1 nodegroup, mixed operands / plus operation should work", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, operation: corev1alpha1.IntOrArithmeticOperation{ // Operation has higher priority than int value IntVal: 12, @@ -783,7 +786,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 2 nodegroups, mixed operands / plus operation should work", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, operation: corev1alpha1.IntOrArithmeticOperation{ // Operation has higher priority than int value IntVal: 12, @@ -803,7 +806,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 1 nodegroup, mixed operands / minus operation should work", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -822,7 +825,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 2 nodegroups, mixed operands / minus operation should work", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -841,7 +844,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 1 nodegroup, mixed operands / multiply operation should work", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -860,7 +863,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 2 nodegroups, mixed operands / multiply operation should work", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -879,7 +882,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 1 nodegroup, mixed operands / divide operation should work", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -888,17 +891,17 @@ func Test_nodeGroupReplicas(t *testing.T) { }, Operator: corev1alpha1.ArithmeticOperatorDivide, RightOperand: corev1alpha1.IntOrField{ - FieldVal: fieldPointer(corev1alpha1.FieldLongNotStarted), + FieldVal: fieldPointer(corev1alpha1.FieldNotStarted), }, }, }, }, - want: 3, + want: 4, }, { name: "with 2 nodegroups, mixed operands / divide operation should work", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -907,17 +910,17 @@ func Test_nodeGroupReplicas(t *testing.T) { }, Operator: corev1alpha1.ArithmeticOperatorDivide, RightOperand: corev1alpha1.IntOrField{ - FieldVal: fieldPointer(corev1alpha1.FieldLongNotStarted), + FieldVal: fieldPointer(corev1alpha1.FieldNotStarted), }, }, }, }, - want: 1, + want: 2, }, { name: "with 1 nodegroup, negative result should return zero", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -936,7 +939,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 2 nodegroups, negative result should return zero", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -955,7 +958,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 1 nodegroup, zero division should return zero", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ @@ -974,7 +977,7 @@ func Test_nodeGroupReplicas(t *testing.T) { { name: "with 2 nodegroups, zero division should return zero", args: args{ - ngs: []clusterautoscaler.NodeGroup{ng, ng}, + ngs: []clusterautoscaler.NodeGroupStatus{ng, ng}, operation: corev1alpha1.IntOrArithmeticOperation{ IntVal: 0, OperationVal: &corev1alpha1.ArithmeticOperation{ diff --git a/go.mod b/go.mod index 831b421..cfcb7c5 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/onsi/gomega v1.34.1 github.com/prometheus/client_golang v1.19.1 go.uber.org/zap v1.27.0 + gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.30.3 k8s.io/apimachinery v0.30.3 k8s.io/client-go v0.30.3 @@ -73,7 +74,6 @@ require ( google.golang.org/protobuf v1.34.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.30.1 // indirect k8s.io/klog/v2 v2.120.1 // indirect diff --git a/go.sum b/go.sum index 29aeeb2..eb03b54 100644 --- a/go.sum +++ b/go.sum @@ -261,18 +261,12 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.30.1 h1:kCm/6mADMdbAxmIh0LBjS54nQBE+U4KmbCfIkF5CpJY= -k8s.io/api v0.30.1/go.mod h1:ddbN2C0+0DIiPntan/bye3SW3PdwLa11/0yqwvuRrJM= k8s.io/api v0.30.3 h1:ImHwK9DCsPA9uoU3rVh4QHAHHK5dTSv1nxJUapx8hoQ= k8s.io/api v0.30.3/go.mod h1:GPc8jlzoe5JG3pb0KJCSLX5oAFIW3/qNJITlDj8BH04= k8s.io/apiextensions-apiserver v0.30.1 h1:4fAJZ9985BmpJG6PkoxVRpXv9vmPUOVzl614xarePws= k8s.io/apiextensions-apiserver v0.30.1/go.mod h1:R4GuSrlhgq43oRY9sF2IToFh7PVlF1JjfWdoG3pixk4= -k8s.io/apimachinery v0.30.1 h1:ZQStsEfo4n65yAdlGTfP/uSHMQSoYzU/oeEbkmF7P2U= -k8s.io/apimachinery v0.30.1/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= k8s.io/apimachinery v0.30.3 h1:q1laaWCmrszyQuSQCfNB8cFgCuDAoPszKY4ucAjDwHc= k8s.io/apimachinery v0.30.3/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= -k8s.io/client-go v0.30.1 h1:uC/Ir6A3R46wdkgCV3vbLyNOYyCJ8oZnjtJGKfytl/Q= -k8s.io/client-go v0.30.1/go.mod h1:wrAqLNs2trwiCH/wxxmT/x3hKVH9PuV0GGW0oDoHVqc= k8s.io/client-go v0.30.3 h1:bHrJu3xQZNXIi8/MoxYtZBBWQQXwy16zqJwloXXfD3k= k8s.io/client-go v0.30.3/go.mod h1:8d4pf8vYu665/kUbsxWAQ/JDBNWqfFeZnvFiVdmx89U= k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= diff --git a/helm/kubestitute/Chart.yaml b/helm/kubestitute/Chart.yaml index cdf450e..1135c7b 100644 --- a/helm/kubestitute/Chart.yaml +++ b/helm/kubestitute/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v2 name: kubestitute type: application -version: 3.0.0 -appVersion: "2.2.0" +version: 3.1.0-rc.0 +appVersion: "2.3.0-rc.0" description: Kubestitute is an event based instances lifecycle manager for Kubernetes. home: https://github.com/quortex/kubestitute sources: diff --git a/helm/kubestitute/README.md b/helm/kubestitute/README.md index fed2f49..85855ba 100644 --- a/helm/kubestitute/README.md +++ b/helm/kubestitute/README.md @@ -1,6 +1,6 @@ # kubestitute -![Version: 3.0.0](https://img.shields.io/badge/Version-3.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.2.0](https://img.shields.io/badge/AppVersion-2.2.0-informational?style=flat-square) +![Version: 3.1.0-rc.0](https://img.shields.io/badge/Version-3.1.0--rc.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.3.0-rc.0](https://img.shields.io/badge/AppVersion-2.3.0--rc.0-informational?style=flat-square) Kubestitute is an event based instances lifecycle manager for Kubernetes. @@ -72,6 +72,7 @@ helm install kubestitute kubestitute/kubestitute -n kubestitute-system | aws.secretAccessKey | string | `""` | The secret access key of a user with necessary permissions. | | manager.clusterAutoscaler.namespace | string | `"kube-system"` | The Cluster Autoscaler namespace. | | manager.clusterAutoscaler.name | string | `"cluster-autoscaler-status"` | The Cluster Autoscaler status configmap name. | +| manager.clusterAutoscaler.legacyFormat | bool | `false` | Whether to parse the Cluster Autoscaler status configmap in legacy format, used by cluster-autoscaler up to version 1.29. | | manager.priorityExpander.enabled | bool | `false` | | | manager.priorityExpander.name | string | `"priority-expander-default"` | Name of the Priority Expander object. | | manager.priorityExpander.namespace | string | `"kubestitute-system"` | Namespace of the Priority Expander object. | diff --git a/helm/kubestitute/templates/deployment.yaml b/helm/kubestitute/templates/deployment.yaml index ecbbb06..45b9e69 100644 --- a/helm/kubestitute/templates/deployment.yaml +++ b/helm/kubestitute/templates/deployment.yaml @@ -42,6 +42,9 @@ spec: - --leader-elect - --clusterautoscaler-namespace={{ .clusterAutoscaler.namespace }} - --clusterautoscaler-status-name={{ .clusterAutoscaler.name }} + {{- if .clusterAutoscaler.legacyFormat }} + - --clusterautoscaler-status-legacy-format + {{- end }} {{- if .priorityExpander.enabled }} - --priority-expander-namespace={{ .priorityExpander.namespace }} - --priority-expander-name={{ .priorityExpander.name }} diff --git a/helm/kubestitute/values.yaml b/helm/kubestitute/values.yaml index 13e928a..442ba69 100644 --- a/helm/kubestitute/values.yaml +++ b/helm/kubestitute/values.yaml @@ -16,6 +16,8 @@ manager: namespace: kube-system # -- The Cluster Autoscaler status configmap name. name: cluster-autoscaler-status + # -- Whether to parse the Cluster Autoscaler status configmap in legacy format, used by cluster-autoscaler up to version 1.29. + legacyFormat: false priorityExpander: enabled: false diff --git a/main.go b/main.go index ae63c08..faf11d0 100644 --- a/main.go +++ b/main.go @@ -62,6 +62,7 @@ func main() { var clusterAutoscalerNamespace string var clusterAutoscalerStatusName string + var clusterAutoscalerStatusLegacyFormat bool var clusterAutoscalerPEConfigMapName string var priorityExpanderNamespace string var priorityExpanderName string @@ -81,6 +82,7 @@ func main() { flag.StringVar(&clusterAutoscalerNamespace, "clusterautoscaler-namespace", "kube-system", "The namespace the clusterautoscaler belongs to.") flag.StringVar(&clusterAutoscalerStatusName, "clusterautoscaler-status-name", "cluster-autoscaler-status", "The name of the clusterautoscaler status configmap.") + flag.BoolVar(&clusterAutoscalerStatusLegacyFormat, "clusterautoscaler-status-legacy-format", false, "Set if the clusterautoscaler status configmap is formatted the legacy readable format, used by cluster-autoscaler up to version 1.29.") flag.StringVar(&clusterAutoscalerPEConfigMapName, "cluster-autoscaler-priority-expander-config-map", "cluster-autoscaler-priority-expander", "The name of the clusterautoscaler priority expander config map.") flag.StringVar(&priorityExpanderNamespace, "priority-expander-namespace", "kubestitute-system", "The namespace the _unique_ priority expander object belongs to.") flag.StringVar(&priorityExpanderName, "priority-expander-name", "priority-expander-default", "The only accepted name for the priority expander object.") @@ -105,9 +107,9 @@ func main() { autoscaling := autoscaling.New(session) mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ - Scheme: scheme, + Scheme: scheme, Metrics: metricsserver.Options{ - BindAddress: metricsAddr, + BindAddress: metricsAddr, }, WebhookServer: webhook.NewServer(webhook.Options{}), HealthProbeBindAddress: probeAddr, @@ -142,8 +144,9 @@ func main() { Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Configuration: controllers.SchedulerReconcilerConfiguration{ - ClusterAutoscalerNamespace: clusterAutoscalerNamespace, - ClusterAutoscalerStatusName: clusterAutoscalerStatusName, + ClusterAutoscalerNamespace: clusterAutoscalerNamespace, + ClusterAutoscalerStatusName: clusterAutoscalerStatusName, + ClusterAutoscalerStatusLegacyFormat: clusterAutoscalerStatusLegacyFormat, }, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Scheduler") @@ -162,11 +165,12 @@ func main() { Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Configuration: controllers.PriorityExpanderReconcilerConfiguration{ - ClusterAutoscalerNamespace: clusterAutoscalerNamespace, - ClusterAutoscalerStatusName: clusterAutoscalerStatusName, - ClusterAutoscalerPEConfigMapName: clusterAutoscalerPEConfigMapName, - PriorityExpanderNamespace: priorityExpanderNamespace, - PriorityExpanderName: priorityExpanderName, + ClusterAutoscalerNamespace: clusterAutoscalerNamespace, + ClusterAutoscalerStatusName: clusterAutoscalerStatusName, + ClusterAutoscalerStatusLegacyFormat: clusterAutoscalerStatusLegacyFormat, + ClusterAutoscalerPEConfigMapName: clusterAutoscalerPEConfigMapName, + PriorityExpanderNamespace: priorityExpanderNamespace, + PriorityExpanderName: priorityExpanderName, }, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "PriorityExpander") diff --git a/utils/clusterautoscaler/parser.go b/utils/clusterautoscaler/parser.go index b8569e6..a30bc06 100644 --- a/utils/clusterautoscaler/parser.go +++ b/utils/clusterautoscaler/parser.go @@ -2,13 +2,109 @@ package clusterautoscaler import ( "bufio" + "fmt" "reflect" "regexp" "strconv" "strings" "time" + + "gopkg.in/yaml.v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +// ParseReadableString parses the cluster autoscaler status +// in readable format into a ClusterAutoscaler Status struct. +func ParseYamlStatus(s string) (*ClusterAutoscalerStatus, error) { + var res ClusterAutoscalerStatus + if err := yaml.Unmarshal([]byte(s), &res); err != nil { + return nil, fmt.Errorf("failed to unmarshal status: %v", err) + } + + return &res, nil +} + +func convertToClusterWideStatus(status Status) *ClusterAutoscalerStatus { + res := ClusterAutoscalerStatus{ + Time: status.Time.Format(configMapLastUpdateFormat), + ClusterWide: ClusterWideStatus{ + Health: ClusterHealthCondition{ + Status: ClusterAutoscalerConditionStatus(status.ClusterWide.Health.Status), + NodeCounts: NodeCount{ + Registered: RegisteredNodeCount{ + Total: int(status.ClusterWide.Health.Registered), + Ready: int(status.ClusterWide.Health.Ready), + NotStarted: int(status.ClusterWide.Health.NotStarted), + BeingDeleted: 0, // Not present in the old status format + Unready: RegisteredUnreadyNodeCount{ + Total: int(status.ClusterWide.Health.Unready), + ResourceUnready: 0, // Present but not parsed in the old configmap + }, + }, + LongUnregistered: int(status.ClusterWide.Health.LongUnregistered), + Unregistered: 0, // Not present in the old status format + }, + LastProbeTime: metav1.NewTime(status.ClusterWide.Health.LastProbeTime), + LastTransitionTime: metav1.NewTime(status.ClusterWide.Health.LastTransitionTime), + }, + ScaleUp: ClusterScaleUpCondition{ + Status: ClusterAutoscalerConditionStatus(status.ClusterWide.ScaleUp.Status), + LastProbeTime: metav1.NewTime(status.ClusterWide.ScaleUp.LastProbeTime), + LastTransitionTime: metav1.NewTime(status.ClusterWide.ScaleUp.LastTransitionTime), + }, + ScaleDown: ScaleDownCondition{ + Status: ClusterAutoscalerConditionStatus(status.ClusterWide.ScaleDown.Status), + Candidates: int(status.ClusterWide.ScaleDown.Candidates), + LastProbeTime: metav1.NewTime(status.ClusterWide.ScaleDown.LastProbeTime), + LastTransitionTime: metav1.NewTime(status.ClusterWide.ScaleDown.LastTransitionTime), + }, + }, + NodeGroups: make([]NodeGroupStatus, len(status.NodeGroups)), + } + + for i := range status.NodeGroups { + res.NodeGroups[i] = NodeGroupStatus{ + Name: status.NodeGroups[i].Name, + Health: NodeGroupHealthCondition{ + Status: ClusterAutoscalerConditionStatus(status.NodeGroups[i].Health.Status), + NodeCounts: NodeCount{ + Registered: RegisteredNodeCount{ + Total: int(status.NodeGroups[i].Health.Registered), + Ready: int(status.NodeGroups[i].Health.Ready), + NotStarted: int(status.NodeGroups[i].Health.NotStarted), + BeingDeleted: 0, // Not present in the old status format + Unready: RegisteredUnreadyNodeCount{ + Total: int(status.NodeGroups[i].Health.Unready), + ResourceUnready: 0, // Present but not parsed in the old configmap + }, + }, + LongUnregistered: int(status.NodeGroups[i].Health.LongUnregistered), + Unregistered: 0, // Not present in the old status format + }, + CloudProviderTarget: int(status.NodeGroups[i].Health.CloudProviderTarget), + MinSize: int(status.NodeGroups[i].Health.MinSize), + MaxSize: int(status.NodeGroups[i].Health.MaxSize), + LastProbeTime: metav1.NewTime(status.NodeGroups[i].Health.LastProbeTime), + LastTransitionTime: metav1.NewTime(status.NodeGroups[i].Health.LastTransitionTime), + }, + ScaleUp: NodeGroupScaleUpCondition{ + Status: ClusterAutoscalerConditionStatus(status.NodeGroups[i].ScaleUp.Status), + BackoffInfo: BackoffInfo{}, // Not present in the old status format + LastProbeTime: metav1.NewTime(status.NodeGroups[i].ScaleUp.LastProbeTime), + LastTransitionTime: metav1.NewTime(status.NodeGroups[i].ScaleUp.LastTransitionTime), + }, + ScaleDown: ScaleDownCondition{ + Status: ClusterAutoscalerConditionStatus(status.NodeGroups[i].ScaleDown.Status), + Candidates: int(status.NodeGroups[i].ScaleDown.Candidates), + LastProbeTime: metav1.NewTime(status.NodeGroups[i].ScaleDown.LastProbeTime), + LastTransitionTime: metav1.NewTime(status.NodeGroups[i].ScaleDown.LastTransitionTime), + }, + } + } + + return &res +} + const ( // configMapLastUpdateFormat it the timestamp format used for last update annotation in status ConfigMap configMapLastUpdateFormat = "2006-01-02 15:04:05.999999999 -0700 MST" @@ -27,7 +123,6 @@ var ( regexHealthReady = regexp.MustCompile(`[\( ]ready=(\d*)`) regexHealthUnready = regexp.MustCompile(`[\( ]unready=(\d*)`) regexHealthNotStarted = regexp.MustCompile(`[\( ]notStarted=(\d*)`) - regexHealthLongNotStarted = regexp.MustCompile(`[\( ]longNotStarted=(\d*)`) regexHealthRegistered = regexp.MustCompile(`[\( ]registered=(\d*)`) regexHealthLongUnregistered = regexp.MustCompile(`[\( ]longUnregistered=(\d*)`) regexHealthCloudProviderTarget = regexp.MustCompile(`[\( ]cloudProviderTarget=(\d*)`) @@ -39,14 +134,13 @@ var ( regexDate = regexp.MustCompile(`(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(.\d*)? \+\d* [A-Z]*)`) ) -// ParseReadableString parses the cluster autoscaler status +// ParseReadableStatus parses the cluster autoscaler status // in readable format into a ClusterAutoscaler Status struct. -func ParseReadableString(s string) *Status { - +func ParseReadableStatus(s string) *ClusterAutoscalerStatus { var currentMajor interface{} var currentMinor interface{} - res := &Status{} + res := Status{} scanner := bufio.NewScanner(strings.NewReader(s)) for scanner.Scan() { @@ -158,7 +252,7 @@ func ParseReadableString(s string) *Status { } } - return res + return convertToClusterWideStatus(res) } // parseHealthStatus extract HealthStatus from readable string @@ -180,7 +274,6 @@ func parseHealth(s string) Health { Ready: parseToInt32(regexHealthReady, s), Unready: parseToInt32(regexHealthUnready, s), NotStarted: parseToInt32(regexHealthNotStarted, s), - LongNotStarted: parseToInt32(regexHealthLongNotStarted, s), Registered: parseToInt32(regexHealthRegistered, s), LongUnregistered: parseToInt32(regexHealthLongUnregistered, s), } diff --git a/utils/clusterautoscaler/parser_test.go b/utils/clusterautoscaler/parser_test.go index f791803..d0c347b 100644 --- a/utils/clusterautoscaler/parser_test.go +++ b/utils/clusterautoscaler/parser_test.go @@ -5,12 +5,241 @@ import ( "time" "github.com/go-test/deep" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const status = ` +var lastProbingTime = metav1.NewTime(time.Date(2020, time.November, 25, 8, 19, 44, 88071148, time.UTC)) + +const yamlStatus = ` +time: 2020-11-25 08:19:44.090873082 +0000 UTC +autoscalerStatus: Running +clusterWide: + health: + status: Healthy + nodeCounts: + registered: + total: 5 + ready: 4 + notStarted: 1 + unready: + total: 2 + resourceUnready: 0 + longUnregistered: 5 + unregistered: 6 + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "2020-11-25T07:46:04.409158551Z" + scaleUp: + status: InProgress + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "2020-11-25T08:18:33.613103712Z" + scaleDown: + status: CandidatesPresent + candidates: 1 + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "2020-11-25T08:19:34.073648791Z" +nodeGroups: +- name: foo + health: + status: Healthy + nodeCounts: + registered: + total: 5 + ready: 1 + notStarted: 3 + unready: + total: 2 + resourceUnready: 0 + longUnregistered: 6 + unregistered: 7 + cloudProviderTarget: 2 + minSize: 1 + maxSize: 3 + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "2020-11-25T07:46:04.409158551Z" + scaleUp: + status: InProgress + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "2020-11-25T08:18:33.613103712Z" + scaleDown: + status: CandidatesPresent + candidates: 1 + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "2020-11-25T08:19:34.073648791Z" +- name: bar + health: + status: Healthy + nodeCounts: + registered: + total: 2 + ready: 2 + notStarted: 2 + unready: + total: 1 + resourceUnready: 0 + longUnregistered: 4 + unregistered: 0 + cloudProviderTarget: 2 + minSize: 0 + maxSize: 3 + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "0001-01-01T00:00:00Z" + scaleUp: + status: NoActivity + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "2020-11-25T08:14:42.467240558Z" + scaleDown: + status: NoCandidates + lastProbeTime: "2020-11-25T08:19:44.088071148Z" + lastTransitionTime: "2020-11-25T08:14:52.480583803Z" +` + +func TestParseYamlStatus(t *testing.T) { + type args struct { + s string + } + tests := []struct { + name string + args args + want *ClusterAutoscalerStatus + wantErr bool + }{ + { + name: "a fully functional status", + args: args{ + s: yamlStatus, + }, + want: &ClusterAutoscalerStatus{ + Time: "2020-11-25 08:19:44.090873082 +0000 UTC", + AutoscalerStatus: ClusterAutoscalerRunning, + ClusterWide: ClusterWideStatus{ + Health: ClusterHealthCondition{ + Status: ClusterAutoscalerHealthy, + NodeCounts: NodeCount{ + Registered: RegisteredNodeCount{ + Total: 5, + Ready: 4, + Unready: RegisteredUnreadyNodeCount{ + Total: 2, + ResourceUnready: 0, + }, + NotStarted: 1, + BeingDeleted: 0, + }, + LongUnregistered: 5, + Unregistered: 6, + }, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Date(2020, time.November, 25, 7, 46, 0o4, 409158551, time.UTC), + }, + ScaleUp: ClusterScaleUpCondition{ + Status: ClusterAutoscalerInProgress, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Date(2020, time.November, 25, 8, 18, 33, 613103712, time.UTC), + }, + ScaleDown: ScaleDownCondition{ + Status: ClusterAutoscalerCandidatesPresent, + Candidates: 1, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Date(2020, time.November, 25, 8, 19, 34, 73648791, time.UTC), + }, + }, + NodeGroups: []NodeGroupStatus{ + { + Name: "foo", + Health: NodeGroupHealthCondition{ + Status: ClusterAutoscalerHealthy, + NodeCounts: NodeCount{ + Registered: RegisteredNodeCount{ + Total: 5, + Ready: 1, + Unready: RegisteredUnreadyNodeCount{ + Total: 2, + ResourceUnready: 0, + }, + NotStarted: 3, + BeingDeleted: 0, + }, + LongUnregistered: 6, + Unregistered: 7, + }, + CloudProviderTarget: 2, + MinSize: 1, + MaxSize: 3, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Date(2020, time.November, 25, 7, 46, 4, 409158551, time.UTC), + }, + ScaleUp: NodeGroupScaleUpCondition{ + Status: ClusterAutoscalerInProgress, + BackoffInfo: BackoffInfo{}, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Date(2020, time.November, 25, 8, 18, 33, 613103712, time.UTC), + }, + ScaleDown: ScaleDownCondition{ + Status: ClusterAutoscalerCandidatesPresent, + Candidates: 1, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Date(2020, time.November, 25, 8, 19, 34, 73648791, time.UTC), + }, + }, + { + Name: "bar", + Health: NodeGroupHealthCondition{ + Status: ClusterAutoscalerHealthy, + NodeCounts: NodeCount{ + Registered: RegisteredNodeCount{ + Total: 2, + Ready: 2, + Unready: RegisteredUnreadyNodeCount{ + Total: 1, + ResourceUnready: 0, + }, + NotStarted: 2, + BeingDeleted: 0, + }, + LongUnregistered: 4, + Unregistered: 0, + }, + CloudProviderTarget: 2, + MinSize: 0, + MaxSize: 3, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Time{}, + }, + ScaleUp: NodeGroupScaleUpCondition{ + Status: ClusterAutoscalerNoActivity, + BackoffInfo: BackoffInfo{}, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Date(2020, time.November, 25, 8, 14, 42, 467240558, time.UTC), + }, + ScaleDown: ScaleDownCondition{ + Status: ClusterAutoscalerNoCandidates, + Candidates: 0, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.Date(2020, time.November, 25, 8, 14, 52, 480583803, time.UTC), + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseYamlStatus(tt.args.s) + if (err != nil) != tt.wantErr { + t.Errorf("ParseYamlStatus() error = %v, wantErr %v", err, tt.wantErr) + return + } + if diff := deep.Equal(got, tt.want); diff != nil { + t.Error(diff) + } + }) + } +} + +const readableStatus = ` Cluster-autoscaler status at 2020-11-25 08:19:44.090873082 +0000 UTC: Cluster-wide: - Health: Healthy (ready=4 unready=2 notStarted=1 longNotStarted=3 registered=5 longUnregistered=5) + Health: Healthy (ready=4 unready=2 notStarted=1 longNotStarted=0 registered=5 longUnregistered=5) LastProbeTime: 2020-11-25 08:19:44.088071148 +0000 UTC m=+2030.020714775 LastTransitionTime: 2020-11-25 07:46:04.409158551 +0000 UTC m=+10.341802256 ScaleUp: InProgress (ready=4 registered=5) @@ -22,7 +251,7 @@ Cluster-wide: NodeGroups: Name: foo - Health: Healthy (ready=1 unready=2 notStarted=3 longNotStarted=4 registered=5 longUnregistered=6 cloudProviderTarget=2 (minSize=1, maxSize=3)) + Health: Healthy (ready=1 unready=2 notStarted=3 longNotStarted=0 registered=5 longUnregistered=6 cloudProviderTarget=2 (minSize=1, maxSize=3)) LastProbeTime: 2020-11-25 08:19:44.088071148 +0000 UTC m=+2030.020714775 LastTransitionTime: 2020-11-25 07:46:04.409158551 +0000 UTC m=+10.341802256 ScaleUp: InProgress (ready=1 cloudProviderTarget=2) @@ -33,7 +262,7 @@ NodeGroups: LastTransitionTime: 2020-11-25 08:19:34.073648791 +0000 UTC m=+2020.006292413 Name: bar - Health: Healthy (ready=2 unready=1 notStarted=2 longNotStarted=3 registered=2 longUnregistered=4 cloudProviderTarget=2 (minSize=0, maxSize=3)) + Health: Healthy (ready=2 unready=1 notStarted=2 longNotStarted=0 registered=2 longUnregistered=4 cloudProviderTarget=2 (minSize=0, maxSize=3)) LastProbeTime: 2020-11-25 08:19:44.088071148 +0000 UTC m=+2030.020714775 LastTransitionTime: 0001-01-01 00:00:00 +0000 UTC ScaleUp: NoActivity (ready=2 cloudProviderTarget=2) @@ -44,105 +273,128 @@ NodeGroups: LastTransitionTime: 2020-11-25 08:14:52.480583803 +0000 UTC m=+1738.413227454 ` -var lpt = time.Date(2020, time.November, 25, 8, 19, 44, 88071148, time.UTC) - -func TestParseReadableString(t *testing.T) { +func TestParseReadableStatus(t *testing.T) { type args struct { s string } tests := []struct { name string args args - want *Status + want *ClusterAutoscalerStatus }{ { name: "a fully functional status", args: args{ - s: status, + s: readableStatus, }, - want: &Status{ - Time: time.Date(2020, time.November, 25, 8, 19, 44, 90873082, time.UTC), - ClusterWide: ClusterWide{ - Health: Health{ - Status: HealthStatusHealthy, - Ready: 4, - Unready: 2, - NotStarted: 1, - LongNotStarted: 3, - Registered: 5, - LongUnregistered: 5, - LastProbeTime: lpt, - LastTransitionTime: time.Date(2020, time.November, 25, 7, 46, 04, 409158551, time.UTC), + want: &ClusterAutoscalerStatus{ + Time: "2020-11-25 08:19:44.090873082 +0000 UTC", + AutoscalerStatus: "", // Present in readable status but not parsed + ClusterWide: ClusterWideStatus{ + Health: ClusterHealthCondition{ + Status: ClusterAutoscalerHealthy, + NodeCounts: NodeCount{ + Registered: RegisteredNodeCount{ + Total: 5, + Ready: 4, + Unready: RegisteredUnreadyNodeCount{ + Total: 2, + ResourceUnready: 0, + }, + NotStarted: 1, + BeingDeleted: 0, + }, + LongUnregistered: 5, + Unregistered: 0, // Not present in readable status + }, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Date(2020, time.November, 25, 7, 46, 0o4, 409158551, time.UTC)), }, - ScaleDown: ScaleDown{ - Status: ScaleDownCandidatesPresent, - Candidates: 1, - LastProbeTime: lpt, - LastTransitionTime: time.Date(2020, time.November, 25, 8, 19, 34, 73648791, time.UTC), + ScaleUp: ClusterScaleUpCondition{ + Status: ClusterAutoscalerInProgress, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Date(2020, time.November, 25, 8, 18, 33, 613103712, time.UTC)), }, - ScaleUp: ScaleUp{ - Status: ScaleUpInProgress, - LastProbeTime: lpt, - LastTransitionTime: time.Date(2020, time.November, 25, 8, 18, 33, 613103712, time.UTC), + ScaleDown: ScaleDownCondition{ + Status: ClusterAutoscalerCandidatesPresent, + Candidates: 1, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Date(2020, time.November, 25, 8, 19, 34, 73648791, time.UTC)), }, }, - NodeGroups: []NodeGroup{ + NodeGroups: []NodeGroupStatus{ { Name: "foo", - Health: NodeGroupHealth{ - Health: Health{ - Status: HealthStatusHealthy, - Ready: 1, - Unready: 2, - NotStarted: 3, - LongNotStarted: 4, - Registered: 5, - LongUnregistered: 6, - LastProbeTime: lpt, - LastTransitionTime: time.Date(2020, time.November, 25, 7, 46, 4, 409158551, time.UTC), + Health: NodeGroupHealthCondition{ + Status: ClusterAutoscalerHealthy, + NodeCounts: NodeCount{ + Registered: RegisteredNodeCount{ + Total: 5, + Ready: 1, + Unready: RegisteredUnreadyNodeCount{ + Total: 2, + ResourceUnready: 0, + }, + NotStarted: 3, + BeingDeleted: 0, + }, + LongUnregistered: 6, + Unregistered: 0, // Not present in readable status }, CloudProviderTarget: 2, MinSize: 1, MaxSize: 3, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Date(2020, time.November, 25, 7, 46, 4, 409158551, time.UTC)), }, - ScaleDown: ScaleDown{ - Status: ScaleDownCandidatesPresent, - Candidates: 1, - LastProbeTime: lpt, - LastTransitionTime: time.Date(2020, time.November, 25, 8, 19, 34, 73648791, time.UTC), + ScaleUp: NodeGroupScaleUpCondition{ + Status: ClusterAutoscalerInProgress, + BackoffInfo: BackoffInfo{}, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Date(2020, time.November, 25, 8, 18, 33, 613103712, time.UTC)), }, - ScaleUp: ScaleUp{ - Status: ScaleUpInProgress, - LastProbeTime: lpt, - LastTransitionTime: time.Date(2020, time.November, 25, 8, 18, 33, 613103712, time.UTC), + ScaleDown: ScaleDownCondition{ + Status: ClusterAutoscalerCandidatesPresent, + Candidates: 1, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Date(2020, time.November, 25, 8, 19, 34, 73648791, time.UTC)), }, }, { Name: "bar", - Health: NodeGroupHealth{ - Health: Health{ - Status: HealthStatusHealthy, - Ready: 2, - Unready: 1, - NotStarted: 2, - LongNotStarted: 3, - Registered: 2, - LongUnregistered: 4, - LastProbeTime: lpt, - LastTransitionTime: time.Time{}}, + Health: NodeGroupHealthCondition{ + Status: ClusterAutoscalerHealthy, + NodeCounts: NodeCount{ + Registered: RegisteredNodeCount{ + Total: 2, + Ready: 2, + Unready: RegisteredUnreadyNodeCount{ + Total: 1, + ResourceUnready: 0, + }, + NotStarted: 2, + BeingDeleted: 0, + }, + LongUnregistered: 4, + Unregistered: 0, // Not present in readable status + }, CloudProviderTarget: 2, MinSize: 0, MaxSize: 3, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Time{}), }, - ScaleDown: ScaleDown{ - Status: ScaleDownNoCandidates, - LastProbeTime: lpt, - LastTransitionTime: time.Date(2020, time.November, 25, 8, 14, 52, 480583803, time.UTC), + ScaleUp: NodeGroupScaleUpCondition{ + Status: ClusterAutoscalerNoActivity, + BackoffInfo: BackoffInfo{}, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Date(2020, time.November, 25, 8, 14, 42, 467240558, time.UTC)), }, - ScaleUp: ScaleUp{ - Status: ScaleUpNoActivity, - LastProbeTime: lpt, - LastTransitionTime: time.Date(2020, time.November, 25, 8, 14, 42, 467240558, time.UTC), + ScaleDown: ScaleDownCondition{ + Status: ClusterAutoscalerNoCandidates, + Candidates: 0, + LastProbeTime: lastProbingTime, + LastTransitionTime: metav1.NewTime(time.Date(2020, time.November, 25, 8, 14, 52, 480583803, time.UTC)), }, }, }, @@ -151,7 +403,7 @@ func TestParseReadableString(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := ParseReadableString(tt.args.s) + got := ParseReadableStatus(tt.args.s) if diff := deep.Equal(got, tt.want); diff != nil { t.Error(diff) } diff --git a/utils/clusterautoscaler/status.go b/utils/clusterautoscaler/readablestatus.go similarity index 66% rename from utils/clusterautoscaler/status.go rename to utils/clusterautoscaler/readablestatus.go index 6148804..85d9d9b 100644 --- a/utils/clusterautoscaler/status.go +++ b/utils/clusterautoscaler/readablestatus.go @@ -4,7 +4,8 @@ import ( "time" ) -// Status contains ClusterAutoscaler status. +// Status contains ClusterAutoscaler Status. +// Deprecated: Use ClusterAutoscalerStatus instead. type Status struct { Time time.Time ClusterWide ClusterWide @@ -13,6 +14,7 @@ type Status struct { // ClusterWide is the global (cluster wide ) // ClusterAutoscaler status. +// Deprecated: Use ClusterWideStatus instead. type ClusterWide struct { Health Health ScaleDown ScaleDown @@ -21,6 +23,7 @@ type ClusterWide struct { // NodeGroup is the ClusterAutoscaler status // by node group. +// Deprecated: Use NodeGroupStatus instead. type NodeGroup struct { Name string Health NodeGroupHealth @@ -30,26 +33,31 @@ type NodeGroup struct { // HealthStatus describes ClusterAutoscaler status // for Node groups Healthness. +// Deprecated: Use ClusterHealthCondition instead. type HealthStatus string const ( // HealthStatusHealthy status means that the cluster is in a good shape. + // Deprecated: Use ClusterAutoscalerHealthy instead. HealthStatusHealthy HealthStatus = "Healthy" // HealthStatusUnhealthy status means that the cluster is in a bad shape. + // Deprecated: Use ClusterAutoscalerUnhealthy instead. HealthStatusUnhealthy HealthStatus = "Unhealthy" ) // Health describes the cluster wide cluster autoscaler // Health condition. +// Deprecated: Use ClusterHealthCondition instead. type Health struct { - Status HealthStatus - Ready, Unready, NotStarted, LongNotStarted, Registered, LongUnregistered int32 - LastProbeTime time.Time - LastTransitionTime time.Time + Status HealthStatus + Ready, Unready, NotStarted, Registered, LongUnregistered int32 + LastProbeTime time.Time + LastTransitionTime time.Time } // NodeGroupHealth describes the individual node group cluster autoscaler // Health condition. +// Deprecated: Use NodeGroupHealthCondition instead. type NodeGroupHealth struct { Health CloudProviderTarget, MinSize, MaxSize int32 @@ -57,17 +65,21 @@ type NodeGroupHealth struct { // ScaleDownStatus describes ClusterAutoscaler status // for Node groups ScaleDown. +// Deprecated: Use ClusterAutoscalerConditionStatus instead. type ScaleDownStatus string const ( // ScaleDownCandidatesPresent status means that there's candidates for scale down. + // Deprecated: Use ClusterAutoscalerCandidatesPresent instead. ScaleDownCandidatesPresent ScaleDownStatus = "CandidatesPresent" // ScaleDownNoCandidates status means that there's no candidates for scale down. + // Deprecated: Use ClusterAutoscalerNoCandidates instead. ScaleDownNoCandidates ScaleDownStatus = "NoCandidates" ) // ScaleDown describes ClusterAutoscaler condition // for Node groups ScaleDown. +// Deprecated: Use ScaleDownCondition instead. type ScaleDown struct { Status ScaleDownStatus Candidates int32 @@ -77,23 +89,30 @@ type ScaleDown struct { // ScaleUpStatus describes ClusterAutoscaler status // for Node groups ScaleUp. +// Deprecated: Use ClusterAutoscalerConditionStatus instead. type ScaleUpStatus string const ( // ScaleUpNeeded status means that scale up is needed. + // Deprecated: Use ClusterAutoscalerNeeded instead. ScaleUpNeeded ScaleUpStatus = "Needed" // ScaleUpNotNeeded status means that scale up is not needed. + // Deprecated: Use ClusterAutoscalerNotNeeded instead. ScaleUpNotNeeded ScaleUpStatus = "NotNeeded" // ScaleUpInProgress status means that scale up is in progress. + // Deprecated: Use ClusterAutoscalerInProgress instead. ScaleUpInProgress ScaleUpStatus = "InProgress" // ScaleUpNoActivity status means that there has been no scale up activity recently. + // Deprecated: Use ClusterAutoscalerNoActivity instead. ScaleUpNoActivity ScaleUpStatus = "NoActivity" // ScaleUpBackoff status means that due to a recently failed scale-up no further scale-ups attempts will be made for some time. + // Deprecated: Use ClusterAutoscalerBackoff instead. ScaleUpBackoff ScaleUpStatus = "Backoff" ) // ScaleUp describes ClusterAutoscaler condition // for Node groups ScaleUp. +// Deprecated: Use ClusterScaleUpCondition or NodeGroupScaleUpCondition instead. type ScaleUp struct { Status ScaleUpStatus LastProbeTime time.Time @@ -101,7 +120,7 @@ type ScaleUp struct { } // GetNodeGroupWithName returns the NodeGroup in slice matching name. -func GetNodeGroupWithName(nodeGroups []NodeGroup, name string) *NodeGroup { +func GetNodeGroupWithName(nodeGroups []NodeGroupStatus, name string) *NodeGroupStatus { for _, e := range nodeGroups { if e.Name == name { return &e diff --git a/utils/clusterautoscaler/yamlstatus.go b/utils/clusterautoscaler/yamlstatus.go new file mode 100644 index 0000000..852b897 --- /dev/null +++ b/utils/clusterautoscaler/yamlstatus.go @@ -0,0 +1,199 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Content retrieved from cluster-autoscaler version 1.30.2: +https://github.com/kubernetes/autoscaler/blob/cluster-autoscaler-1.30.2/cluster-autoscaler/clusterstate/api/types.go +*/ + +package clusterautoscaler + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// ClusterAutoscalerStatusCondition is the status of the cluster autoscaler. +type ClusterAutoscalerStatusCondition string + +const ( + // ClusterAutoscalerRunning status means that the cluster autoscaler has been initialized and running. + ClusterAutoscalerRunning ClusterAutoscalerStatusCondition = "Running" + // ClusterAutoscalerInitializing status means that cluster autoscaler is currently being initialized. + ClusterAutoscalerInitializing ClusterAutoscalerStatusCondition = "Initializing" +) + +// ClusterAutoscalerConditionStatus is a status of ClusterAutoscalerCondition. +type ClusterAutoscalerConditionStatus string + +const ( + // Statuses for Health condition type. + + // ClusterAutoscalerHealthy status means that the cluster is in a good shape. + ClusterAutoscalerHealthy ClusterAutoscalerConditionStatus = "Healthy" + // ClusterAutoscalerUnhealthy status means that the cluster is in a bad shape. + ClusterAutoscalerUnhealthy ClusterAutoscalerConditionStatus = "Unhealthy" + + // Statuses for ScaleDown condition type. + + // ClusterAutoscalerCandidatesPresent status means that there are candidates for scale down. + ClusterAutoscalerCandidatesPresent ClusterAutoscalerConditionStatus = "CandidatesPresent" + // ClusterAutoscalerNoCandidates status means that there are no candidates for scale down. + ClusterAutoscalerNoCandidates ClusterAutoscalerConditionStatus = "NoCandidates" + + // Statuses for ScaleUp condition type. + + // ClusterAutoscalerNeeded status means that scale up is needed. + ClusterAutoscalerNeeded ClusterAutoscalerConditionStatus = "Needed" + // ClusterAutoscalerNotNeeded status means that scale up is not needed. + ClusterAutoscalerNotNeeded ClusterAutoscalerConditionStatus = "NotNeeded" + // ClusterAutoscalerInProgress status means that scale up is in progress. + ClusterAutoscalerInProgress ClusterAutoscalerConditionStatus = "InProgress" + // ClusterAutoscalerNoActivity status means that there has been no scale up activity recently. + ClusterAutoscalerNoActivity ClusterAutoscalerConditionStatus = "NoActivity" + // ClusterAutoscalerBackoff status means that due to a recently failed scale-up no further scale-ups attempts will be made for some time. + ClusterAutoscalerBackoff ClusterAutoscalerConditionStatus = "Backoff" +) + +// RegisteredUnreadyNodeCount contains node counts of registered but unready nodes. +type RegisteredUnreadyNodeCount struct { + // Total number of registered but unready nodes. + Total int `json:"total" yaml:"total"` + // ResourceUnready is the number of registered but unready nodes due to a missing resource (e.g. GPU). + ResourceUnready int `json:"resourceUnready" yaml:"resourceUnready"` +} + +// RegisteredNodeCount contains node counts of registered nodes. +type RegisteredNodeCount struct { + Total int `json:"total" yaml:"total"` + Ready int `json:"ready" yaml:"ready"` + NotStarted int `json:"notStarted" yaml:"notStarted"` + // Number of nodes that are being currently deleted. They exist in K8S but are not included in NodeGroup.TargetSize(). + BeingDeleted int `json:"beingDeleted,omitempty" yaml:"beingDeleted,omitempty"` + Unready RegisteredUnreadyNodeCount `json:"unready,omitempty" yaml:"unready,omitempty"` +} + +// NodeCount contains number of nodes that satisfy different criteria. +type NodeCount struct { + Registered RegisteredNodeCount `json:"registered,omitempty" yaml:"registered,omitempty"` + LongUnregistered int `json:"longUnregistered" yaml:"longUnregistered"` + Unregistered int `json:"unregistered" yaml:"unregistered"` +} + +// ClusterHealthCondition contains information about health condition for the whole cluster. +type ClusterHealthCondition struct { + // Status of cluster health. + Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"` + // NodeCounts contains number of nodes that satisfy different criteria in the cluster. + NodeCounts NodeCount `json:"nodeCounts,omitempty" yaml:"nodeCounts,omitempty"` + // LastProbeTime is the last time we probed the condition. + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"` + // LastTransitionTime is the time since when the condition was in the given state. + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"` +} + +// NodeGroupHealthCondition contains information about health condition for a node group. +type NodeGroupHealthCondition struct { + // Status of node group health. + Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"` + // NodeCounts contains number of nodes that satisfy different criteria in the node group. + NodeCounts NodeCount `json:"nodeCounts,omitempty" yaml:"nodeCounts,omitempty"` + // CloudProviderTarget is the target size set by cloud provider. + CloudProviderTarget int `json:"cloudProviderTarget" yaml:"cloudProviderTarget"` + // MinSize is the CA max size of a node group. + MinSize int `json:"minSize" yaml:"minSize"` + // MaxSize is the CA max size of a node group. + MaxSize int `json:"maxSize" yaml:"maxSize"` + // LastProbeTime is the last time we probed the condition. + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"` + // LastTransitionTime is the time since when the condition was in the given state. + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"` +} + +// ClusterScaleUpCondition contains information about scale up condition for the whole cluster. +type ClusterScaleUpCondition struct { + // Status of the scale up. + Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"` + // LastProbeTime is the last time we probed the condition. + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"` + // LastTransitionTime is the time since when the condition was in the given state. + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"` +} + +// BackoffInfo contains error information that caused the backoff. +type BackoffInfo struct { + // ErrorCode is a specific error code for error condition + ErrorCode string `json:"errorCode,omitempty" yaml:"errorCode,omitempty"` + // ErrorMessage is human readable description of error condition + ErrorMessage string `json:"errorMessage,omitempty" yaml:"errorMessage,omitempty"` +} + +// NodeGroupScaleUpCondition contains information about scale up condition for a node group. +type NodeGroupScaleUpCondition struct { + // Status of the scale up. + Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"` + // LastProbeTime is the last time we probed the condition. + BackoffInfo BackoffInfo `json:"backoffInfo,omitempty" yaml:"backoffInfo,omitempty"` + // LastProbeTime is the last time we probed the condition. + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"` + // LastTransitionTime is the time since when the condition was in the given state. + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"` +} + +// ScaleDownCondition contains information about scale down condition for a node group or the whole cluster. +type ScaleDownCondition struct { + // Status of the scale down. + Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"` + // Candidates number for the scale down. + Candidates int `json:"candidates,omitempty" yaml:"candidates,omitempty"` + // LastProbeTime is the last time we probed the condition. + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"` + // LastTransitionTime is the time since when the condition was in the given state. + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"` +} + +// ClusterWideStatus contains status that apply to the whole cluster. +type ClusterWideStatus struct { + // Health contains information about health condition of the cluster. + Health ClusterHealthCondition `json:"health,omitempty" yaml:"health,omitempty"` + // ScaleUp contains information about scale up condition of the cluster. + ScaleUp ClusterScaleUpCondition `json:"scaleUp,omitempty" yaml:"scaleUp,omitempty"` + // ScaleDown contains information about scale down condition of the node group. + ScaleDown ScaleDownCondition `json:"scaleDown,omitempty" yaml:"scaleDown,omitempty"` +} + +// NodeGroupStatus contains status of an individual node group on which CA works.. +type NodeGroupStatus struct { + // Name of the node group. + Name string `json:"name,omitempty" yaml:"name,omitempty"` + // Health contains information about health condition of the node group. + Health NodeGroupHealthCondition `json:"health,omitempty" yaml:"health,omitempty"` + // ScaleUp contains information about scale up condition of the node group. + ScaleUp NodeGroupScaleUpCondition `json:"scaleUp,omitempty" yaml:"scaleUp,omitempty"` + // ScaleDown contains information about scale down condition of the node group. + ScaleDown ScaleDownCondition `json:"scaleDown,omitempty" yaml:"scaleDown,omitempty"` +} + +// ClusterAutoscalerStatus contains ClusterAutoscaler status. +type ClusterAutoscalerStatus struct { + // Time of the cluster autoscaler status. + Time string `json:"time,omitempty" yaml:"time,omitempty"` + // AutoscalerStatus contains status of ClusterAutoscaler (e.g. 'Initializing' & 'Running'). + AutoscalerStatus ClusterAutoscalerStatusCondition `json:"autoscalerStatus,omitempty" yaml:"autoscalerStatus,omitempty"` + // Message contains extra information about the status. + Message string `json:"message,omitempty" yaml:"message,omitempty"` + // ClusterWide contains conditions that apply to the whole cluster. + ClusterWide ClusterWideStatus `json:"clusterWide,omitempty" yaml:"clusterWide,omitempty"` + // NodeGroups contains status information of individual node groups on which CA works. + NodeGroups []NodeGroupStatus `json:"nodeGroups,omitempty" yaml:"nodeGroups,omitempty"` +}