Skip to content

Commit 37d70b3

Browse files
deliahuMiguel Varela Ramos
authored and
Miguel Varela Ramos
committed
Add active and queued requests to async dashboard (#2326)
1 parent 3301262 commit 37d70b3

File tree

5 files changed

+57
-30
lines changed

5 files changed

+57
-30
lines changed

dev/prometheus.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@ The following is a list of metrics that are currently in use.
1414
1. api_name
1515
1. api_kind
1616
1. status_code
17-
1. cortex_async_queue_length with the following labels:
17+
1. cortex_async_active with the following labels:
18+
1. api_name
19+
1. api_kind
20+
1. cortex_async_queued with the following labels:
21+
1. api_name
22+
1. api_kind
23+
1. cortex_async_in_flight with the following labels:
1824
1. api_name
1925
1. api_kind
2026
1. cortex_async_latency_bucket with the following labels:

manager/manifests/grafana/grafana-dashboard-async.yaml

+23-17
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ data:
3636
"editable": true,
3737
"gnetId": null,
3838
"graphTooltip": 0,
39-
"iteration": 1625168772532,
39+
"iteration": 1625805144458,
4040
"links": [],
4141
"panels": [
4242
{
@@ -61,10 +61,6 @@ data:
6161
{
6262
"collapsed": false,
6363
"datasource": null,
64-
"fieldConfig": {
65-
"defaults": {},
66-
"overrides": []
67-
},
6864
"gridPos": {
6965
"h": 1,
7066
"w": 24,
@@ -175,7 +171,7 @@ data:
175171
"dashLength": 10,
176172
"dashes": false,
177173
"datasource": null,
178-
"description": "Active in-flight requests for an API.\n\nNote: In-flight requests are recorded every 10 seconds, which will correspond to the minimum resolution.",
174+
"description": "In-flight requests for an API.\n\nNote: In-flight requests are recorded every 10 seconds, which will correspond to the minimum resolution.",
179175
"fill": 1,
180176
"fillGradient": 0,
181177
"gridPos": {
@@ -213,10 +209,28 @@ data:
213209
"steppedLine": false,
214210
"targets": [
215211
{
216-
"expr": "sum(cortex_async_queue_length{api_kind=\"AsyncAPI\",api_name=\"$api_name\"}) by (api_name)",
212+
"exemplar": true,
213+
"expr": "sum(cortex_async_active{api_kind=\"AsyncAPI\",api_name=\"$api_name\"}) by (api_name)",
214+
"hide": false,
217215
"interval": "",
218-
"legendFormat": "{{api_name}}",
219-
"refId": "A"
216+
"legendFormat": "active",
217+
"refId": "Active"
218+
},
219+
{
220+
"exemplar": true,
221+
"expr": "sum(cortex_async_queued{api_kind=\"AsyncAPI\",api_name=\"$api_name\"}) by (api_name)",
222+
"hide": false,
223+
"interval": "",
224+
"legendFormat": "queued",
225+
"refId": "Queued"
226+
},
227+
{
228+
"exemplar": true,
229+
"expr": "sum(cortex_async_in_flight{api_kind=\"AsyncAPI\",api_name=\"$api_name\"}) by (api_name)",
230+
"hide": true,
231+
"interval": "",
232+
"legendFormat": "in flight",
233+
"refId": "In Flight"
220234
}
221235
],
222236
"thresholds": [],
@@ -1014,10 +1028,6 @@ data:
10141028
{
10151029
"collapsed": false,
10161030
"datasource": null,
1017-
"fieldConfig": {
1018-
"defaults": {},
1019-
"overrides": []
1020-
},
10211031
"gridPos": {
10221032
"h": 1,
10231033
"w": 24,
@@ -1445,10 +1455,6 @@ data:
14451455
{
14461456
"collapsed": false,
14471457
"datasource": null,
1448-
"fieldConfig": {
1449-
"defaults": {},
1450-
"overrides": []
1451-
},
14521458
"gridPos": {
14531459
"h": 1,
14541460
"w": 24,

pkg/autoscaler/async_scaler.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ func (s *AsyncScaler) GetInFlightRequests(apiName string, window time.Duration)
7070
windowSeconds := int64(window.Seconds())
7171

7272
// PromQL query:
73-
// sum(sum_over_time(cortex_async_queue_length{api_name="<apiName>"}[60s])) /
74-
// sum(count_over_time(cortex_async_queue_length{api_name="<apiName>"}[60s]))
73+
// sum(sum_over_time(cortex_async_in_flight{api_name="<apiName>"}[60s])) /
74+
// sum(count_over_time(cortex_async_in_flight{api_name="<apiName>"}[60s]))
7575
query := fmt.Sprintf(
76-
"sum(sum_over_time(cortex_async_queue_length{api_name=\"%s\"}[%ds])) / "+
77-
"max(count_over_time(cortex_async_queue_length{api_name=\"%s\"}[%ds]))",
76+
"sum(sum_over_time(cortex_async_in_flight{api_name=\"%s\"}[%ds])) / "+
77+
"max(count_over_time(cortex_async_in_flight{api_name=\"%s\"}[%ds]))",
7878
apiName, windowSeconds,
7979
apiName, windowSeconds,
8080
)

pkg/health/health.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,7 @@ func Check(awsClient *awslib.Client, k8sClient *k8s.Client, clusterName string)
215215
}
216216

217217
func GetWarnings(k8sClient *k8s.Client) (ClusterWarnings, error) {
218-
var (
219-
prometheusMemorySaturationWarn string
220-
)
218+
var prometheusMemorySaturationWarn string
221219

222220
saturation, err := getPodMemorySaturation(k8sClient, "prometheus-prometheus-0", "default")
223221
if err != nil {

pkg/operator/resources/asyncapi/queue_metrics.go

+22-5
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,26 @@ const (
3333
_sqsQueryTimeoutSeconds = 10
3434
)
3535

36-
var queueLengthGauge = promauto.NewGaugeVec(
36+
var activeGauge = promauto.NewGaugeVec(
3737
prometheus.GaugeOpts{
38-
Name: "cortex_async_queue_length",
39-
Help: "The number of in-queue messages for a cortex AsyncAPI",
38+
Name: "cortex_async_active",
39+
Help: "The number of messages that are actively being processed by an AsyncAPI",
40+
ConstLabels: map[string]string{"api_kind": userconfig.AsyncAPIKind.String()},
41+
}, []string{"api_name"},
42+
)
43+
44+
var queuedGauge = promauto.NewGaugeVec(
45+
prometheus.GaugeOpts{
46+
Name: "cortex_async_queued",
47+
Help: "The number queued messages for an AsyncAPI",
48+
ConstLabels: map[string]string{"api_kind": userconfig.AsyncAPIKind.String()},
49+
}, []string{"api_name"},
50+
)
51+
52+
var inFlightGauge = promauto.NewGaugeVec(
53+
prometheus.GaugeOpts{
54+
Name: "cortex_async_in_flight",
55+
Help: "The number of in-flight messages for an AsyncAPI (including active and queued)",
4056
ConstLabels: map[string]string{"api_kind": userconfig.AsyncAPIKind.String()},
4157
}, []string{"api_name"},
4258
)
@@ -74,8 +90,9 @@ func updateQueueLengthMetricsFn(apiName, queueURL string) func() error {
7490
return err
7591
}
7692

77-
queueLength := visibleMessages + invisibleMessages
78-
queueLengthGauge.WithLabelValues(apiName).Set(queueLength)
93+
activeGauge.WithLabelValues(apiName).Set(invisibleMessages)
94+
queuedGauge.WithLabelValues(apiName).Set(visibleMessages)
95+
inFlightGauge.WithLabelValues(apiName).Set(invisibleMessages + visibleMessages)
7996

8097
return nil
8198
}

0 commit comments

Comments
 (0)