Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add pause metric in prometheus for scaledobject #4446

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ To learn more about active deprecations, we recommend checking [GitHub Discussio
- **CPU/Memory scaler**: Add support for scale to zero if there are multiple triggers([#4269](https://github.com/kedacore/keda/issues/4269))
- **Redis Scalers**: Allow scaling using redis stream length ([#4277](https://github.com/kedacore/keda/issues/4277))
- **General:** Introduce new Solr Scaler ([#4234](https://github.com/kedacore/keda/issues/4234))
- **Prometheus Metrics**: Introduce number of paused ScaledObjects in Prometheus metrics ([#4430](https://github.com/kedacore/keda/issues/4430))


### Improvements

Expand Down
16 changes: 15 additions & 1 deletion controllers/keda/scaledobject_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,11 @@ func (r *ScaledObjectReconciler) Reconcile(ctx context.Context, req ctrl.Request
conditions.SetReadyCondition(metav1.ConditionTrue, kedav1alpha1.ScaledObjectConditionReadySucccesReason, msg)
}

if _, present := scaledObject.GetAnnotations()[kedacontrollerutil.PausedReplicasAnnotation]; present {
ref := scaledObject.Spec.ScaleTargetRef
reqLogger.Info("Detect ScaledObject with pause annotation", "targetKind", ref.Kind, "targetName", ref.Name)
}

if err := kedacontrollerutil.SetStatusConditions(ctx, r.Client, reqLogger, scaledObject, &conditions); err != nil {
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -523,10 +528,18 @@ func (r *ScaledObjectReconciler) updatePromMetrics(scaledObject *kedav1alpha1.Sc
}
metricsData.triggerTypes = triggerTypes

_, isPausedObject := scaledObject.GetAnnotations()[kedacontrollerutil.PausedReplicasAnnotation]
value := 0
if isPausedObject {
value = 1
}

prommetrics.RecordScalerPaused(namespacedName, scaledObject.Name, float64(value))

scaledObjectPromMetricsMap[namespacedName] = metricsData
}

func (r *ScaledObjectReconciler) updatePromMetricsOnDelete(namespacedName string) {
func (r *ScaledObjectReconciler) updatePromMetricsOnDelete(namespacedName string, scaledObject string) {
scaledObjectPromMetricsLock.Lock()
defer scaledObjectPromMetricsLock.Unlock()

Expand All @@ -535,6 +548,7 @@ func (r *ScaledObjectReconciler) updatePromMetricsOnDelete(namespacedName string
for _, triggerType := range metricsData.triggerTypes {
prommetrics.DecrementTriggerTotal(triggerType)
}
prommetrics.RecordScalerPaused(namespacedName, scaledObject, 0)
}

delete(scaledObjectPromMetricsMap, namespacedName)
Expand Down
2 changes: 1 addition & 1 deletion controllers/keda/scaledobject_finalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (r *ScaledObjectReconciler) finalizeScaledObject(ctx context.Context, logge
return err
}

r.updatePromMetricsOnDelete(namespacedName)
r.updatePromMetricsOnDelete(namespacedName, scaledObject.Name)
}

logger.Info("Successfully finalized ScaledObject")
Expand Down
17 changes: 17 additions & 0 deletions pkg/prommetrics/prommetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ var (
},
[]string{"type", "namespace"},
)

scaledObjectPausedTotal = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: DefaultPromMetricsNamespace,
Subsystem: "scaled_object",
Name: "paused",
Help: "Total number of paused scaled_objects",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The help message is wrong.

},
[]string{"namespace", "scaledObject"},
)
)

func init() {
Expand All @@ -118,6 +128,7 @@ func init() {
metrics.Registry.MustRegister(scalerActive)
metrics.Registry.MustRegister(scalerErrors)
metrics.Registry.MustRegister(scaledObjectErrors)
metrics.Registry.MustRegister(scaledObjectPausedTotal)

metrics.Registry.MustRegister(triggerTotalsGaugeVec)
metrics.Registry.MustRegister(crdTotalsGaugeVec)
Expand Down Expand Up @@ -204,3 +215,9 @@ func DecrementCRDTotal(crdType, namespace string) {

crdTotalsGaugeVec.WithLabelValues(crdType, namespace).Dec()
}

func RecordScalerPaused(namespace string, scaledObject string, value float64) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please rewrite this function to accept bool instead of float64? the same way RecordScalerActive() is written.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind doing it. I just can't figure out why the tests fails

labels := prometheus.Labels{"namespace": namespace, "scaledObject": scaledObject}

scaledObjectPausedTotal.With(labels).Set(value)
}
79 changes: 79 additions & 0 deletions tests/sequential/prometheus_metrics/prometheus_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,51 @@ spec:
name: {{.TestName}}-secret
key: key
---
`
deploymentForPausedTemplate = `
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{.DeploymentName}}-pause
namespace: {{.TestNamespace}}
labels:
app: {{.DeploymentName}}-pause
spec:
replicas: 1
selector:
matchLabels:
app: {{.DeploymentName}}-pause
template:
metadata:
labels:
app: {{.DeploymentName}}-pause
spec:
containers:
- name: {{.DeploymentName}}-pause
image: nginxinc/nginx-unprivileged
`

scaledObjectPausedTemplate = `
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: {{.ScaledObjectName}}-paused
namespace: {{.TestNamespace}}
annotations:
autoscaling.keda.sh/paused-replicas: "2"
spec:
scaleTargetRef:
name: {{.DeploymentName}}-pause
pollingInterval: 5
idleReplicaCount: 0
minReplicaCount: 1
maxReplicaCount: 2
cooldownPeriod: 10
triggers:
- type: kubernetes-workload
metadata:
podSelector: 'app={{.DeploymentName}}-pause'
value: '1'
`
)

Expand All @@ -265,6 +310,7 @@ func TestPrometheusMetrics(t *testing.T) {
testMetricsServerScalerMetricValue(t)
testOperatorMetrics(t, kc, data)
testWebhookMetrics(t, data)
testPauseMetric(t, data)

// cleanup
DeleteKubernetesResources(t, testNamespace, data, templates)
Expand Down Expand Up @@ -707,3 +753,36 @@ func checkWebhookValues(t *testing.T, families map[string]*promModel.MetricFamil
}
assert.GreaterOrEqual(t, metricValue, 1.0, "keda_webhook_scaled_object_validation_total has to be greater than 0")
}

func testPauseMetric(t *testing.T, data templateData) {
t.Log("--- testing scaleobject pause metric ---")

family := fetchAndParsePrometheusMetrics(t, fmt.Sprintf("curl --insecure %s", kedaOperatorPrometheusURL))

if _, ok := family["keda_scaled_object_paused"]; ok {
t.Errorf("metric should not be available")
}

KubectlApplyWithTemplate(t, data, "deploymentForPausedTemplate", deploymentForPausedTemplate)
KubectlApplyWithTemplate(t, data, "scaledObjectPausedTemplate", scaledObjectPausedTemplate)

family = fetchAndParsePrometheusMetrics(t, fmt.Sprintf("curl --insecure %s", kedaOperatorPrometheusURL))
Comment on lines +766 to +769
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the metric is checked too fast and KEDA doesn't have enough time to process the ScaledObject. I guess that you can introduce a sleep here (5 seconds should be enough) or maybe deploying this resources in the beginning of the test file instead of inside the test case


if val, ok := family["keda_scaled_object_paused"]; ok {
metrics := val.GetMetric()
for _, metric := range metrics {
assert.Equal(t, float64(1), *metric.Gauge.Value)
}
} else {
t.Errorf("metric not available")
}

KubectlDeleteWithTemplate(t, data, "deploymentForPausedTemplate", deploymentForPausedTemplate)
KubectlDeleteWithTemplate(t, data, "scaledObjectPausedTemplate", scaledObjectPausedTemplate)

family = fetchAndParsePrometheusMetrics(t, fmt.Sprintf("curl --insecure %s", kedaOperatorPrometheusURL))

if _, ok := family["keda_scaled_object_paused"]; ok {
t.Errorf("metric should not be available")
}
}