From 61accfd0591c184bf2585f5b6711b3871024f506 Mon Sep 17 00:00:00 2001 From: Kevin Earls Date: Thu, 1 Dec 2022 10:48:42 +0100 Subject: [PATCH] Add TargetMemoryUtilization metric for AutoScaling (#1223) * Add TargetMemoryUtilization metric for AutoScaling Signed-off-by: Kevin Earls * Add changes to v2beta2 as there is no way to un e2e tests just for one version Signed-off-by: Kevin Earls * See if we just have a race condition Signed-off-by: Kevin Earls * Reset kuttl timeout Signed-off-by: Kevin Earls * Add some debugging code to help analyze failures on github Signed-off-by: Kevin Earls * Try to appease the linter Signed-off-by: Kevin Earls * Restore autoscale tests Signed-off-by: Kevin Earls * Cleanup Signed-off-by: Kevin Earls * More cleanup Signed-off-by: Kevin Earls * Respond to comments Signed-off-by: Kevin Earls * Cleanup whitespace so linter will rerun Signed-off-by: Kevin Earls * Don't set TargetCPUUtilization to default if another metric is set Signed-off-by: Kevin Earls Signed-off-by: Kevin Earls --- apis/v1alpha1/opentelemetrycollector_types.go | 3 + .../opentelemetrycollector_webhook.go | 14 +++-- apis/v1alpha1/zz_generated.deepcopy.go | 5 ++ ...ntelemetry.io_opentelemetrycollectors.yaml | 5 ++ ...ntelemetry.io_opentelemetrycollectors.yaml | 5 ++ docs/api.md | 9 +++ pkg/collector/horizontalpodautoscaler.go | 58 ++++++++++++++----- pkg/collector/horizontalpodautoscaler_test.go | 28 ++++++--- .../reconcile/horizontalpodautoscaler.go | 11 +++- tests/e2e/autoscale/00-install.yaml | 3 + tests/e2e/autoscale/01-assert.yaml | 1 + tests/e2e/autoscale/02-assert.yaml | 1 + 12 files changed, 116 insertions(+), 27 deletions(-) diff --git a/apis/v1alpha1/opentelemetrycollector_types.go b/apis/v1alpha1/opentelemetrycollector_types.go index 0aac34ca75..a296381b23 100644 --- a/apis/v1alpha1/opentelemetrycollector_types.go +++ b/apis/v1alpha1/opentelemetrycollector_types.go @@ -277,6 +277,9 @@ type AutoscalerSpec struct { // If average CPU exceeds this value, the HPA will scale up. Defaults to 90 percent. // +optional TargetCPUUtilization *int32 `json:"targetCPUUtilization,omitempty"` + // +optional + // TargetMemoryUtilization sets the target average memory utilization across all replicas + TargetMemoryUtilization *int32 `json:"targetMemoryUtilization,omitempty"` } func init() { diff --git a/apis/v1alpha1/opentelemetrycollector_webhook.go b/apis/v1alpha1/opentelemetrycollector_webhook.go index fd4c5bd6a9..ea41f2b103 100644 --- a/apis/v1alpha1/opentelemetrycollector_webhook.go +++ b/apis/v1alpha1/opentelemetrycollector_webhook.go @@ -67,13 +67,15 @@ func (r *OpenTelemetryCollector) Default() { r.Spec.TargetAllocator.Replicas = &one } - // Set default targetCPUUtilization for autoscaler - if r.Spec.MaxReplicas != nil && (r.Spec.Autoscaler == nil || r.Spec.Autoscaler.TargetCPUUtilization == nil) { - defaultCPUTarget := int32(90) + if r.Spec.MaxReplicas != nil { if r.Spec.Autoscaler == nil { r.Spec.Autoscaler = &AutoscalerSpec{} } - r.Spec.Autoscaler.TargetCPUUtilization = &defaultCPUTarget + + if r.Spec.Autoscaler.TargetMemoryUtilization == nil && r.Spec.Autoscaler.TargetCPUUtilization == nil { + defaultCPUTarget := int32(90) + r.Spec.Autoscaler.TargetCPUUtilization = &defaultCPUTarget + } } } @@ -176,7 +178,9 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error { if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetCPUUtilization != nil && (*r.Spec.Autoscaler.TargetCPUUtilization < int32(1) || *r.Spec.Autoscaler.TargetCPUUtilization > int32(99)) { return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetCPUUtilization should be greater than 0 and less than 100") } - + if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetMemoryUtilization != nil && (*r.Spec.Autoscaler.TargetMemoryUtilization < int32(1) || *r.Spec.Autoscaler.TargetMemoryUtilization > int32(99)) { + return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetMemoryUtilization should be greater than 0 and less than 100") + } } if r.Spec.Ingress.Type == IngressTypeNginx && r.Spec.Mode == ModeSidecar { diff --git a/apis/v1alpha1/zz_generated.deepcopy.go b/apis/v1alpha1/zz_generated.deepcopy.go index 65bd3901d8..01c5f1dbdc 100644 --- a/apis/v1alpha1/zz_generated.deepcopy.go +++ b/apis/v1alpha1/zz_generated.deepcopy.go @@ -39,6 +39,11 @@ func (in *AutoscalerSpec) DeepCopyInto(out *AutoscalerSpec) { *out = new(int32) **out = **in } + if in.TargetMemoryUtilization != nil { + in, out := &in.TargetMemoryUtilization, &out.TargetMemoryUtilization + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalerSpec. diff --git a/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml b/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml index 41d2e014ae..38dbce2c79 100644 --- a/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml +++ b/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml @@ -1016,6 +1016,11 @@ spec: the HPA will scale up. Defaults to 90 percent. format: int32 type: integer + targetMemoryUtilization: + description: TargetMemoryUtilization sets the target average memory + utilization across all replicas + format: int32 + type: integer type: object config: description: Config is the raw JSON to be used as the collector's diff --git a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml index 068df5d1ad..c6990780b0 100644 --- a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml +++ b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml @@ -1014,6 +1014,11 @@ spec: the HPA will scale up. Defaults to 90 percent. format: int32 type: integer + targetMemoryUtilization: + description: TargetMemoryUtilization sets the target average memory + utilization across all replicas + format: int32 + type: integer type: object config: description: Config is the raw JSON to be used as the collector's diff --git a/docs/api.md b/docs/api.md index 87358baaf8..9359a696c4 100644 --- a/docs/api.md +++ b/docs/api.md @@ -3228,6 +3228,15 @@ Autoscaler specifies the pod autoscaling configuration to use for the OpenTeleme Format: int32
false + + targetMemoryUtilization + integer + + TargetMemoryUtilization sets the target average memory utilization across all replicas
+
+ Format: int32
+ + false diff --git a/pkg/collector/horizontalpodautoscaler.go b/pkg/collector/horizontalpodautoscaler.go index 8fc133ab1b..c7205562d9 100644 --- a/pkg/collector/horizontalpodautoscaler.go +++ b/pkg/collector/horizontalpodautoscaler.go @@ -33,9 +33,7 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al labels := Labels(otelcol, cfg.LabelsFilter()) labels["app.kubernetes.io/name"] = naming.Collector(otelcol) - annotations := Annotations(otelcol) - var result client.Object objectMeta := metav1.ObjectMeta{ @@ -46,6 +44,22 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al } if autoscalingVersion == autodetect.AutoscalingVersionV2Beta2 { + metrics := []autoscalingv2beta2.MetricSpec{} + + if otelcol.Spec.Autoscaler.TargetMemoryUtilization != nil { + utilizationTarget := autoscalingv2beta2.MetricSpec{ + Type: autoscalingv2beta2.ResourceMetricSourceType, + Resource: &autoscalingv2beta2.ResourceMetricSource{ + Name: corev1.ResourceMemory, + Target: autoscalingv2beta2.MetricTarget{ + Type: autoscalingv2beta2.UtilizationMetricType, + AverageUtilization: otelcol.Spec.Autoscaler.TargetMemoryUtilization, + }, + }, + } + metrics = append(metrics, utilizationTarget) + } + targetCPUUtilization := autoscalingv2beta2.MetricSpec{ Type: autoscalingv2beta2.ResourceMetricSourceType, Resource: &autoscalingv2beta2.ResourceMetricSource{ @@ -56,7 +70,7 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al }, }, } - metrics := []autoscalingv2beta2.MetricSpec{targetCPUUtilization} + metrics = append(metrics, targetCPUUtilization) autoscaler := autoscalingv2beta2.HorizontalPodAutoscaler{ ObjectMeta: objectMeta, @@ -79,17 +93,35 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al result = &autoscaler } else { - targetCPUUtilization := autoscalingv2.MetricSpec{ - Type: autoscalingv2.ResourceMetricSourceType, - Resource: &autoscalingv2.ResourceMetricSource{ - Name: corev1.ResourceCPU, - Target: autoscalingv2.MetricTarget{ - Type: autoscalingv2.UtilizationMetricType, - AverageUtilization: otelcol.Spec.Autoscaler.TargetCPUUtilization, + metrics := []autoscalingv2.MetricSpec{} + + if otelcol.Spec.Autoscaler.TargetMemoryUtilization != nil { + utilizationTarget := autoscalingv2.MetricSpec{ + Type: autoscalingv2.ResourceMetricSourceType, + Resource: &autoscalingv2.ResourceMetricSource{ + Name: corev1.ResourceMemory, + Target: autoscalingv2.MetricTarget{ + Type: autoscalingv2.UtilizationMetricType, + AverageUtilization: otelcol.Spec.Autoscaler.TargetMemoryUtilization, + }, }, - }, + } + metrics = append(metrics, utilizationTarget) + } + + if otelcol.Spec.Autoscaler.TargetCPUUtilization != nil { + targetCPUUtilization := autoscalingv2.MetricSpec{ + Type: autoscalingv2.ResourceMetricSourceType, + Resource: &autoscalingv2.ResourceMetricSource{ + Name: corev1.ResourceCPU, + Target: autoscalingv2.MetricTarget{ + Type: autoscalingv2.UtilizationMetricType, + AverageUtilization: otelcol.Spec.Autoscaler.TargetCPUUtilization, + }, + }, + } + metrics = append(metrics, targetCPUUtilization) } - metrics := []autoscalingv2.MetricSpec{targetCPUUtilization} autoscaler := autoscalingv2.HorizontalPodAutoscaler{ ObjectMeta: objectMeta, @@ -104,7 +136,7 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al Metrics: metrics, }, } - if otelcol.Spec.Autoscaler != nil && otelcol.Spec.Autoscaler.Behavior != nil { + if otelcol.Spec.Autoscaler.Behavior != nil { autoscaler.Spec.Behavior = otelcol.Spec.Autoscaler.Behavior } result = &autoscaler diff --git a/pkg/collector/horizontalpodautoscaler_test.go b/pkg/collector/horizontalpodautoscaler_test.go index 04b90b3480..aa5e1c173b 100644 --- a/pkg/collector/horizontalpodautoscaler_test.go +++ b/pkg/collector/horizontalpodautoscaler_test.go @@ -41,7 +41,8 @@ func TestHPA(t *testing.T) { var minReplicas int32 = 3 var maxReplicas int32 = 5 - var cpuUtilization int32 = 90 + var cpuUtilization int32 = 66 + var memoryUtilization int32 = 77 otelcol := v1alpha1.OpenTelemetryCollector{ ObjectMeta: metav1.ObjectMeta{ @@ -51,7 +52,8 @@ func TestHPA(t *testing.T) { Replicas: &minReplicas, MaxReplicas: &maxReplicas, Autoscaler: &v1alpha1.AutoscalerSpec{ - TargetCPUUtilization: &cpuUtilization, + TargetCPUUtilization: &cpuUtilization, + TargetMemoryUtilization: &memoryUtilization, }, }, } @@ -76,9 +78,13 @@ func TestHPA(t *testing.T) { assert.Equal(t, "my-instance-collector", hpa.Labels["app.kubernetes.io/name"]) assert.Equal(t, int32(3), *hpa.Spec.MinReplicas) assert.Equal(t, int32(5), hpa.Spec.MaxReplicas) - assert.Equal(t, 1, len(hpa.Spec.Metrics)) - assert.Equal(t, corev1.ResourceCPU, hpa.Spec.Metrics[0].Resource.Name) - assert.Equal(t, int32(90), *hpa.Spec.Metrics[0].Resource.Target.AverageUtilization) + for _, metric := range hpa.Spec.Metrics { + if metric.Resource.Name == corev1.ResourceCPU { + assert.Equal(t, cpuUtilization, *metric.Resource.Target.AverageUtilization) + } else if metric.Resource.Name == corev1.ResourceMemory { + assert.Equal(t, memoryUtilization, *metric.Resource.Target.AverageUtilization) + } + } } else { hpa := raw.(*autoscalingv2.HorizontalPodAutoscaler) @@ -87,9 +93,15 @@ func TestHPA(t *testing.T) { assert.Equal(t, "my-instance-collector", hpa.Labels["app.kubernetes.io/name"]) assert.Equal(t, int32(3), *hpa.Spec.MinReplicas) assert.Equal(t, int32(5), hpa.Spec.MaxReplicas) - assert.Equal(t, 1, len(hpa.Spec.Metrics)) - assert.Equal(t, corev1.ResourceCPU, hpa.Spec.Metrics[0].Resource.Name) - assert.Equal(t, int32(90), *hpa.Spec.Metrics[0].Resource.Target.AverageUtilization) + assert.Equal(t, 2, len(hpa.Spec.Metrics)) + + for _, metric := range hpa.Spec.Metrics { + if metric.Resource.Name == corev1.ResourceCPU { + assert.Equal(t, cpuUtilization, *metric.Resource.Target.AverageUtilization) + } else if metric.Resource.Name == corev1.ResourceMemory { + assert.Equal(t, memoryUtilization, *metric.Resource.Target.AverageUtilization) + } + } } }) } diff --git a/pkg/collector/reconcile/horizontalpodautoscaler.go b/pkg/collector/reconcile/horizontalpodautoscaler.go index 1f8435ff11..d5df719b0c 100644 --- a/pkg/collector/reconcile/horizontalpodautoscaler.go +++ b/pkg/collector/reconcile/horizontalpodautoscaler.go @@ -20,6 +20,7 @@ import ( autoscalingv2 "k8s.io/api/autoscaling/v2" autoscalingv2beta2 "k8s.io/api/autoscaling/v2beta2" + corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/types" @@ -127,7 +128,15 @@ func setAutoscalerSpec(params Params, autoscalingVersion autodetect.AutoscalingV } else { updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.MinReplicas = &one } - updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics[0].Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetCPUUtilization + + // This will update memory and CPU usage for now, and can be used to update other metrics in the future + for _, metric := range updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics { + if metric.Resource.Name == corev1.ResourceCPU { + metric.Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetCPUUtilization + } else if metric.Resource.Name == corev1.ResourceMemory { + metric.Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetMemoryUtilization + } + } } } } diff --git a/tests/e2e/autoscale/00-install.yaml b/tests/e2e/autoscale/00-install.yaml index 04e52e7be9..15f4aed640 100644 --- a/tests/e2e/autoscale/00-install.yaml +++ b/tests/e2e/autoscale/00-install.yaml @@ -1,3 +1,6 @@ +# This creates two different deployments. The first one will be used to see if we scale properly. (Note that we are +# only scaling up to 2 because of limitations of KUTTL). The second is to check the targetCPUUtilization option. +# apiVersion: opentelemetry.io/v1alpha1 kind: OpenTelemetryCollector metadata: diff --git a/tests/e2e/autoscale/01-assert.yaml b/tests/e2e/autoscale/01-assert.yaml index 2c2eec0238..76c712f7af 100644 --- a/tests/e2e/autoscale/01-assert.yaml +++ b/tests/e2e/autoscale/01-assert.yaml @@ -1,3 +1,4 @@ +# Wait until tracegen has completed and the simplest deployment has scaled up to 2 apiVersion: batch/v1 kind: Job metadata: diff --git a/tests/e2e/autoscale/02-assert.yaml b/tests/e2e/autoscale/02-assert.yaml index fb4e052f2b..c610ad3f0e 100644 --- a/tests/e2e/autoscale/02-assert.yaml +++ b/tests/e2e/autoscale/02-assert.yaml @@ -1,3 +1,4 @@ +# Wait for the collector to scale back down to 1 apiVersion: opentelemetry.io/v1alpha1 kind: OpenTelemetryCollector