From f4d5bcc0182e332b0db381d32bbec16ea7858c17 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Thu, 27 Jul 2023 13:48:42 +0100 Subject: [PATCH] Use Loki SSD dashboards Signed-off-by: Michel Hollands --- .../src/dashboards/loki-deletion.json | 366 +++++++++++++++++- .../src/dashboards/loki-logs.json | 3 +- .../loki-mixin-recording-rules.json | 186 ++++++--- .../src/dashboards/loki-operational.json | 57 ++- .../src/dashboards/loki-reads-resources.json | 135 +++---- .../src/dashboards/loki-reads.json | 6 +- .../src/dashboards/loki-retention.json | 28 +- .../src/dashboards/loki-writes-resources.json | 26 ++ .../src/dashboards/loki-writes.json | 6 +- 9 files changed, 623 insertions(+), 190 deletions(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-deletion.json b/charts/meta-monitoring/src/dashboards/loki-deletion.json index 84bfee6..d66419f 100644 --- a/charts/meta-monitoring/src/dashboards/loki-deletion.json +++ b/charts/meta-monitoring/src/dashboards/loki-deletion.json @@ -217,7 +217,83 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "in progress", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "# of Delete Requests (received - processed) ", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, "stack": false, "steppedLine": false, "targets": [ @@ -273,7 +349,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 5, "legend": { "avg": false, "current": false, @@ -293,7 +369,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ @@ -361,7 +437,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 5, + "id": 6, "legend": { "avg": false, "current": false, @@ -381,15 +457,15 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", + "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "failures", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -397,7 +473,159 @@ "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Failures in Loading Delete Requests / Hour", + "title": "Compactor CPU usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": " {{pod}} ", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compactor memory usage (MiB)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compaction run duration (seconds)", "tooltip": { "shared": true, "sort": 2, @@ -435,7 +663,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Failures", + "title": "Compactor", "titleSize": "h6" }, { @@ -449,7 +677,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 6, + "id": 9, "legend": { "avg": false, "current": false, @@ -469,7 +697,83 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "failures", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Failures in Loading Delete Requests / Hour", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, "stack": false, "steppedLine": false, "targets": [ @@ -523,7 +827,45 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Deleted lines", + "title": "Deletion metrics", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$loki_datasource", + "id": 11, + "span": 6, + "targets": [ + { + "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ", + "refId": "A" + } + ], + "title": "In progress/finished", + "type": "logs" + }, + { + "datasource": "$loki_datasource", + "id": 12, + "span": 6, + "targets": [ + { + "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"", + "refId": "A" + } + ], + "title": "Requests", + "type": "logs" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "List of deletion requests", "titleSize": "h6" } ], diff --git a/charts/meta-monitoring/src/dashboards/loki-logs.json b/charts/meta-monitoring/src/dashboards/loki-logs.json index c09d154..e2e9e88 100644 --- a/charts/meta-monitoring/src/dashboards/loki-logs.json +++ b/charts/meta-monitoring/src/dashboards/loki-logs.json @@ -6,7 +6,6 @@ "gnetId": null, "graphTooltip": 0, "hideControls": false, - "id": 8, "iteration": 1583185057230, "links": [ { @@ -1070,4 +1069,4 @@ "title": "Loki / Logs", "uid": "logs", "version": 0 - } \ No newline at end of file + } diff --git a/charts/meta-monitoring/src/dashboards/loki-mixin-recording-rules.json b/charts/meta-monitoring/src/dashboards/loki-mixin-recording-rules.json index fe49ec7..3bb931f 100644 --- a/charts/meta-monitoring/src/dashboards/loki-mixin-recording-rules.json +++ b/charts/meta-monitoring/src/dashboards/loki-mixin-recording-rules.json @@ -1,41 +1,27 @@ { "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [ ], - "type": "dashboard" - }, - "type": "dashboard" - }, - { - "datasource": "${datasource}", - "enable": false, - "expr": "sum by (tenant) (changes(loki_ruler_wal_prometheus_tsdb_wal_truncations_total{tenant=~\"${tenant}\"}[$__rate_interval]))", - "iconColor": "red", - "name": "WAL Truncations", - "target": { - "queryType": "Azure Monitor", - "refId": "Anno" - }, - "titleFormat": "{{tenant}}" - } - ] + "list": [ ] }, "editable": true, "fiscalYearStartMonth": 0, "gnetId": null, "graphTooltip": 0, + "hideControls": false, "iteration": 1635347545534, - "links": [ ], + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], "liveNow": false, "panels": [ { @@ -599,59 +585,139 @@ "type": "timeseries" } ], - "schemaVersion": 31, + "refresh": "10s", + "rows": [ ], + "schemaVersion": 14, "style": "dark", - "tags": [ ], + "tags": [ + "loki" + ], "templating": { "list": [ { - "description": null, - "error": null, + "current": { + "text": "default", + "value": "default" + }, "hide": 0, - "includeAll": false, - "label": "Datasource", - "multi": false, + "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", - "queryValue": "", "refresh": 1, "regex": "", - "skipUrlSync": false, "type": "datasource" }, { "allValue": null, - "datasource": "${datasource}", - "definition": "label_values(loki_ruler_wal_samples_appended_total, tenant)", - "description": null, - "error": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", "hide": 0, - "includeAll": true, - "label": "Tenant", - "multi": true, - "name": "tenant", + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", "options": [ ], - "query": { - "query": "label_values(loki_ruler_wal_samples_appended_total, tenant)", - "refId": "StandardVariableQuery" + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" }, - "refresh": 2, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [ ], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "tenant", + "options": [ ], + "query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster=\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))", + "refresh": 0, + "regex": "/\"([^\"]+)\"/", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false } ] }, "time": { - "from": "now-6h", + "from": "now-1h", "to": "now" }, - "timepicker": { }, - "timezone": "", - "title": "Recording Rules", - "uid": "2xKA_ZK7k", - "version": 9, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Recording Rules", + "uid": "recording-rules", + "version": 0, "weekStart": "" } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/loki-operational.json b/charts/meta-monitoring/src/dashboards/loki-operational.json index b69db23..e845300 100644 --- a/charts/meta-monitoring/src/dashboards/loki-operational.json +++ b/charts/meta-monitoring/src/dashboards/loki-operational.json @@ -6,7 +6,6 @@ "gnetId": null, "graphTooltip": 0, "hideControls": false, - "id": 68, "iteration": 1588704280892, "links": [ { @@ -567,17 +566,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.75, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".5", "refId": "C" } @@ -673,17 +672,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".5", "refId": "C" } @@ -779,7 +778,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" @@ -877,18 +876,18 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "hide": false, "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "hide": false, "legendFormat": ".5", "refId": "C" @@ -985,7 +984,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" @@ -1085,17 +1084,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.5", "refId": "C" } @@ -1191,17 +1190,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".99-{{route}}", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".9-{{route}}", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".5-{{route}}", "refId": "C" } @@ -1297,7 +1296,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[5m])) by (route)", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" @@ -1396,17 +1395,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".99-{{route}}", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".9-{{route}}", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".5-{{route}}", "refId": "C" } @@ -1502,7 +1501,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" @@ -2049,7 +2048,7 @@ "panels": [ ], "targets": [ { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"", + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} |= \"level=error\"", "refId": "A" } ], @@ -2100,7 +2099,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (route) > 0", "interval": "", "intervalFactor": 1, "legendFormat": "{{route}}", @@ -2190,9 +2189,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (ingester)", + "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", "intervalFactor": 1, - "legendFormat": "{{ingester}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -3256,7 +3255,7 @@ "panels": [ ], "targets": [ { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"", + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} |= \"level=error\"", "refId": "A" } ], @@ -3307,7 +3306,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[1m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[1m])) by (route)", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (route) > 0", "interval": "", "intervalFactor": 1, "legendFormat": "{{route}}", diff --git a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json index 6fa166f..e54223c 100644 --- a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json +++ b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json @@ -53,6 +53,11 @@ "points": false, "renderer": "flot", "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, { "alias": "limit", "color": "#E02F44", @@ -72,6 +77,14 @@ "legendLink": null, "step": 10 }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "request", + "legendLink": null, + "step": 10 + }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", "format": "time_series", @@ -142,6 +155,11 @@ "points": false, "renderer": "flot", "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, { "alias": "limit", "color": "#E02F44", @@ -161,6 +179,14 @@ "legendLink": null, "step": 10 }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "request", + "legendLink": null, + "step": 10 + }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)", "format": "time_series", @@ -509,83 +535,6 @@ "show": false } ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "loki_boltdb_shipper_query_readiness_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "duration", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Query Readiness Duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] } ], "repeat": null, @@ -607,7 +556,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 8, + "id": 7, "legend": { "avg": false, "current": false, @@ -626,6 +575,11 @@ "points": false, "renderer": "flot", "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, { "alias": "limit", "color": "#E02F44", @@ -645,6 +599,14 @@ "legendLink": null, "step": 10 }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "request", + "legendLink": null, + "step": 10 + }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", "format": "time_series", @@ -695,7 +657,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 9, + "id": 8, "legend": { "avg": false, "current": false, @@ -714,6 +676,11 @@ "points": false, "renderer": "flot", "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, { "alias": "limit", "color": "#E02F44", @@ -733,6 +700,14 @@ "legendLink": null, "step": 10 }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "request", + "legendLink": null, + "step": 10 + }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)", "format": "time_series", @@ -783,7 +758,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 10, + "id": 9, "legend": { "avg": false, "current": false, diff --git a/charts/meta-monitoring/src/dashboards/loki-reads.json b/charts/meta-monitoring/src/dashboards/loki-reads.json index 6cae30f..cbec8fd 100644 --- a/charts/meta-monitoring/src/dashboards/loki-reads.json +++ b/charts/meta-monitoring/src/dashboards/loki-reads.json @@ -142,7 +142,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ route }} 99th Percentile", @@ -150,7 +150,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ route }} 50th Percentile", @@ -158,7 +158,7 @@ "step": 10 }, { - "expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) ", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ route }} Average", diff --git a/charts/meta-monitoring/src/dashboards/loki-retention.json b/charts/meta-monitoring/src/dashboards/loki-retention.json index 7fc99ec..87075c0 100644 --- a/charts/meta-monitoring/src/dashboards/loki-retention.json +++ b/charts/meta-monitoring/src/dashboards/loki-retention.json @@ -52,6 +52,11 @@ "points": false, "renderer": "flot", "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, { "alias": "limit", "color": "#E02F44", @@ -71,6 +76,14 @@ "legendLink": null, "step": 10 }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "request", + "legendLink": null, + "step": 10 + }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", "format": "time_series", @@ -140,6 +153,11 @@ "points": false, "renderer": "flot", "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, { "alias": "limit", "color": "#E02F44", @@ -159,6 +177,14 @@ "legendLink": null, "step": 10 }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "request", + "legendLink": null, + "step": 10 + }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)", "format": "time_series", @@ -1534,4 +1560,4 @@ "title": "Loki / Retention", "uid": "retention", "version": 0 - } \ No newline at end of file + } diff --git a/charts/meta-monitoring/src/dashboards/loki-writes-resources.json b/charts/meta-monitoring/src/dashboards/loki-writes-resources.json index 1b68bd3..2a5b26f 100644 --- a/charts/meta-monitoring/src/dashboards/loki-writes-resources.json +++ b/charts/meta-monitoring/src/dashboards/loki-writes-resources.json @@ -128,6 +128,11 @@ "points": false, "renderer": "flot", "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, { "alias": "limit", "color": "#E02F44", @@ -147,6 +152,14 @@ "legendLink": null, "step": 10 }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "request", + "legendLink": null, + "step": 10 + }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", "format": "time_series", @@ -217,6 +230,11 @@ "points": false, "renderer": "flot", "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, { "alias": "limit", "color": "#E02F44", @@ -236,6 +254,14 @@ "legendLink": null, "step": 10 }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "request", + "legendLink": null, + "step": 10 + }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)", "format": "time_series", diff --git a/charts/meta-monitoring/src/dashboards/loki-writes.json b/charts/meta-monitoring/src/dashboards/loki-writes.json index ebaf33e..17d54cf 100644 --- a/charts/meta-monitoring/src/dashboards/loki-writes.json +++ b/charts/meta-monitoring/src/dashboards/loki-writes.json @@ -142,7 +142,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})) * 1e3", "format": "time_series", "intervalFactor": 2, "legendFormat": "99th Percentile", @@ -150,7 +150,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})) * 1e3", "format": "time_series", "intervalFactor": 2, "legendFormat": "50th Percentile", @@ -158,7 +158,7 @@ "step": 10 }, { - "expr": "1e3 * sum(job:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"}) / sum(job:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})", + "expr": "1e3 * sum(cluster_job:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}) / sum(cluster_job:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Average",