From d552651a1049971a1aa001ac2633b860779e0bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9ssica=20Lins?= Date: Mon, 1 Nov 2021 17:34:32 -0300 Subject: [PATCH 1/7] Add queryFrontend selector, start dashboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jéssica Lins --- .bingo/prometheus.mod | 41 ++++++++++++++++++++++- mixin/config.libsonnet | 4 +++ mixin/dashboards/query_frontend.libsonnet | 21 ++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 mixin/dashboards/query_frontend.libsonnet diff --git a/.bingo/prometheus.mod b/.bingo/prometheus.mod index 5e496c7de9..4040f753f4 100644 --- a/.bingo/prometheus.mod +++ b/.bingo/prometheus.mod @@ -20,4 +20,43 @@ replace ( k8s.io/klog => github.com/simonpasquier/klog-gokit v0.1.0 ) -require github.com/prometheus/prometheus v2.4.3+incompatible // cmd/prometheus +require ( + github.com/Azure/azure-sdk-for-go v0.0.0-00010101000000-000000000000 // indirect + github.com/Azure/go-autorest v0.0.0-00010101000000-000000000000 // indirect + github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect + github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a // indirect + github.com/aws/aws-sdk-go v1.41.15 // indirect + github.com/cockroachdb/cmux v0.0.0-00010101000000-000000000000 // indirect + github.com/cockroachdb/cockroach v0.0.0-00010101000000-000000000000 // indirect + github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect + github.com/go-kit/kit v0.12.0 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/googleapis/gnostic v0.0.0-00010101000000-000000000000 // indirect + github.com/gophercloud/gophercloud v0.0.0-00010101000000-000000000000 // indirect + github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect + github.com/hashicorp/consul/api v1.11.0 // indirect + github.com/jpillora/backoff v1.0.0 // indirect + github.com/julienschmidt/httprouter v1.3.0 // indirect + github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect + github.com/oklog/oklog v0.3.2 // indirect + github.com/oklog/run v1.1.0 // indirect + github.com/oklog/ulid v1.3.1 // indirect + github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect + github.com/peterbourgon/diskv v2.0.1+incompatible // indirect + github.com/prometheus/prometheus v2.4.3+incompatible // cmd/prometheus + github.com/prometheus/tsdb v0.0.0-00010101000000-000000000000 // indirect + github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 // indirect + github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect + github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect + golang.org/x/net v0.0.0-20211029224645-99673261e6eb // indirect + golang.org/x/oauth2 v0.0.0-20211028175245-ba495a64dcb5 // indirect + google.golang.org/api v0.60.0 // indirect + google.golang.org/genproto v0.0.0-20211101144312-62acf1d99145 // indirect + google.golang.org/grpc v1.41.0 // indirect + gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect + gopkg.in/fsnotify/fsnotify.v1 v1.4.7 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + k8s.io/api v0.0.0-00010101000000-000000000000 // indirect + k8s.io/apimachinery v0.0.0-00010101000000-000000000000 // indirect + k8s.io/client-go v0.0.0-00010101000000-000000000000 // indirect +) diff --git a/mixin/config.libsonnet b/mixin/config.libsonnet index e4d415d5ef..32d1d82e76 100644 --- a/mixin/config.libsonnet +++ b/mixin/config.libsonnet @@ -28,6 +28,10 @@ selector: 'job=~".*thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, + queryFrontend+:: { + selector: 'job=~".*thanos-query-frontend.*"', + title: '%(prefix)sQueryFrontend' % $.dashboard.prefix, + }, store+:: { selector: 'job=~".*thanos-store.*"', title: '%(prefix)sStore' % $.dashboard.prefix, diff --git a/mixin/dashboards/query_frontend.libsonnet b/mixin/dashboards/query_frontend.libsonnet new file mode 100644 index 0000000000..053b3e8155 --- /dev/null +++ b/mixin/dashboards/query_frontend.libsonnet @@ -0,0 +1,21 @@ +local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; +local utils = import '../lib/utils.libsonnet'; + +{ + local thanos = self, + queryFrontend+:: { + selector: error 'must provide selector for Thanos Query Frontend dashboard', + title: error 'must provide title for Thanos Query Frontend dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job=~"$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, + }, + grafanaDashboards+:: { + [if thanos.queryFrontend != null then 'query_frontend.json']: + g.dashboard(thanos.queryFrontend.title) + .addRow( + g.resourceUtilizationRow(thanos.queryFrontend.dashboard.selector, thanos.queryFrontend.dashboard.dimensions) + ), + }, +} From 44e8f6af9db892be16c16f2db2323d26a0110708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9ssica=20Lins?= Date: Mon, 8 Nov 2021 17:22:52 -0300 Subject: [PATCH 2/7] Fix naming, generate query_frontend.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jéssica Lins --- examples/dashboards/query_frontend.json | 412 ++++++++++++++++++++++ mixin/config.libsonnet | 2 +- mixin/dashboards/dashboards.libsonnet | 1 + mixin/dashboards/query_frontend.libsonnet | 21 +- 4 files changed, 431 insertions(+), 5 deletions(-) create mode 100644 examples/dashboards/query_frontend.json diff --git a/examples/dashboards/query_frontend.json b/examples/dashboards/query_frontend.json new file mode 100644 index 0000000000..b16f0dc0c2 --- /dev/null +++ b/examples/dashboards/query_frontend.json @@ -0,0 +1,412 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows rate of requests against /query_frontend for the given time.", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, handler, code) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{handler}} {{code}}", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "error": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows ratio of errors compared to the the total number of handled requests against /query_frontend.", + "fill": 10, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "error", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows how long has it taken to handle requests in quantiles.", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~\"$job\", handler=\"query-frontend\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "step": 10 + }, + { + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~\"$job\", handler=\"query-frontend\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "step": 10 + }, + { + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~\"$job\", handler=\"query-frontend\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend API", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "thanos-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "hide": 0, + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" + }, + { + "allValue": null, + "current": { + "text": "all", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": false, + "name": "job", + "options": [ ], + "query": "label_values(up{job=~\".*thanos-query-frontend.*\"}, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Thanos / QueryFrontend", + "uid": "303c4e660a475c4c8cf6aee97da3a24a", + "version": 0 +} diff --git a/mixin/config.libsonnet b/mixin/config.libsonnet index 32d1d82e76..4ac843ae77 100644 --- a/mixin/config.libsonnet +++ b/mixin/config.libsonnet @@ -28,7 +28,7 @@ selector: 'job=~".*thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, - queryFrontend+:: { + query_frontend+:: { selector: 'job=~".*thanos-query-frontend.*"', title: '%(prefix)sQueryFrontend' % $.dashboard.prefix, }, diff --git a/mixin/dashboards/dashboards.libsonnet b/mixin/dashboards/dashboards.libsonnet index 5bd99093f5..d35bbcd843 100644 --- a/mixin/dashboards/dashboards.libsonnet +++ b/mixin/dashboards/dashboards.libsonnet @@ -1,4 +1,5 @@ (import 'query.libsonnet') + +(import 'query_frontend.libsonnet') + (import 'store.libsonnet') + (import 'sidecar.libsonnet') + (import 'receive.libsonnet') + diff --git a/mixin/dashboards/query_frontend.libsonnet b/mixin/dashboards/query_frontend.libsonnet index 053b3e8155..db190a7d1b 100644 --- a/mixin/dashboards/query_frontend.libsonnet +++ b/mixin/dashboards/query_frontend.libsonnet @@ -3,7 +3,7 @@ local utils = import '../lib/utils.libsonnet'; { local thanos = self, - queryFrontend+:: { + query_frontend+:: { selector: error 'must provide selector for Thanos Query Frontend dashboard', title: error 'must provide title for Thanos Query Frontend dashboard', dashboard:: { @@ -12,10 +12,23 @@ local utils = import '../lib/utils.libsonnet'; }, }, grafanaDashboards+:: { - [if thanos.queryFrontend != null then 'query_frontend.json']: - g.dashboard(thanos.queryFrontend.title) + [if thanos.query_frontend != null then 'query_frontend.json']: + local queryFrontendHandlerSelector = utils.joinLabels([thanos.query_frontend.dashboard.selector, 'handler="query-frontend"']); + g.dashboard(thanos.query_frontend.title) .addRow( - g.resourceUtilizationRow(thanos.queryFrontend.dashboard.selector, thanos.queryFrontend.dashboard.dimensions) + g.row('Query Frontend API') + .addPanel( + g.panel('Rate', 'Shows rate of requests against /query_frontend for the given time.') + + g.httpQpsPanel('http_requests_total', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) + ) + .addPanel( + g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests against /query_frontend.') + + g.httpErrPanel('http_requests_total', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) + ) + .addPanel( + g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') + + g.latencyPanel('http_request_duration_seconds', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) + ) ), }, } From 0a2d0410eed3feea3cb26b47d281832b34526d43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9ssica=20Lins?= Date: Tue, 9 Nov 2021 16:54:34 -0300 Subject: [PATCH 3/7] Add resources row MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jéssica Lins --- examples/dashboards/dashboards.md | 1 + examples/dashboards/query_frontend.json | 282 +++++++++++++++++++++- mixin/config.libsonnet | 2 +- mixin/dashboards/query_frontend.libsonnet | 3 + 4 files changed, 286 insertions(+), 2 deletions(-) diff --git a/examples/dashboards/dashboards.md b/examples/dashboards/dashboards.md index 4fae5a1a1c..766ffcf2df 100644 --- a/examples/dashboards/dashboards.md +++ b/examples/dashboards/dashboards.md @@ -5,6 +5,7 @@ There exists Grafana dashboards for each component (not all of them complete) ta - [Thanos Overview](overview.json) - [Thanos Compact](compact.json) - [Thanos Querier](query.json) +- [Thanos Query Frontend](query_frontend.json) - [Thanos Store](store.json) - [Thanos Receiver](receive.json) - [Thanos Sidecar](sidecar.json) diff --git a/examples/dashboards/query_frontend.json b/examples/dashboards/query_frontend.json index b16f0dc0c2..146c55478b 100644 --- a/examples/dashboards/query_frontend.json +++ b/examples/dashboards/query_frontend.json @@ -313,6 +313,286 @@ "showTitle": true, "title": "Query Frontend API", "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_alloc_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc all {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "go_memstats_heap_alloc_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc heap {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "rate(go_memstats_alloc_bytes_total{job=~\"$job\"}[30s])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc rate all {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "rate(go_memstats_heap_alloc_bytes{job=~\"$job\"}[30s])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc rate heap {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "go_memstats_stack_inuse_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inuse heap {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "go_memstats_heap_inuse_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inuse stack {{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Used", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_gc_duration_seconds{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{quantile}} {{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "GC Time Quantiles", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Resources", + "titleSize": "h6" } ], "schemaVersion": 14, @@ -406,7 +686,7 @@ ] }, "timezone": "UTC", - "title": "Thanos / QueryFrontend", + "title": "Thanos / Query Frontend", "uid": "303c4e660a475c4c8cf6aee97da3a24a", "version": 0 } diff --git a/mixin/config.libsonnet b/mixin/config.libsonnet index 4ac843ae77..962c351356 100644 --- a/mixin/config.libsonnet +++ b/mixin/config.libsonnet @@ -30,7 +30,7 @@ }, query_frontend+:: { selector: 'job=~".*thanos-query-frontend.*"', - title: '%(prefix)sQueryFrontend' % $.dashboard.prefix, + title: '%(prefix)sQuery Frontend' % $.dashboard.prefix, }, store+:: { selector: 'job=~".*thanos-store.*"', diff --git a/mixin/dashboards/query_frontend.libsonnet b/mixin/dashboards/query_frontend.libsonnet index db190a7d1b..d2a2b62fbe 100644 --- a/mixin/dashboards/query_frontend.libsonnet +++ b/mixin/dashboards/query_frontend.libsonnet @@ -29,6 +29,9 @@ local utils = import '../lib/utils.libsonnet'; g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') + g.latencyPanel('http_request_duration_seconds', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) ) + ) + .addRow( + g.resourceUtilizationRow(thanos.query_frontend.dashboard.selector, thanos.query_frontend.dashboard.dimensions) ), }, } From 8e73e1f20c1de2ee9b782f3ccb79631dcd5106a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9ssica=20Lins?= Date: Thu, 11 Nov 2021 09:49:41 -0300 Subject: [PATCH 4/7] Add cache row, fe queries panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jéssica Lins --- examples/dashboards/query_frontend.json | 447 +++++++++++++++++++++- mixin/dashboards/query_frontend.libsonnet | 49 ++- 2 files changed, 483 insertions(+), 13 deletions(-) diff --git a/examples/dashboards/query_frontend.json b/examples/dashboards/query_frontend.json index 146c55478b..644555b4f8 100644 --- a/examples/dashboards/query_frontend.json +++ b/examples/dashboards/query_frontend.json @@ -19,7 +19,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "Shows rate of requests against /query_frontend for the given time.", + "description": "Shows rate of requests against Query Frontend for the given time.", "fill": 10, "id": 1, "legend": { @@ -62,7 +62,7 @@ } ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": true, "steppedLine": false, "targets": [ @@ -77,7 +77,104 @@ "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Rate", + "title": "Rate of requests", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows rate of queries passing through Query Frontend", + "fill": 10, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, handler, code) (rate(thanos_query_frontend_queries_total{job=~\"$job\", op=\"query_range\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{handler}} {{code}}", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of queries", "tooltip": { "shared": false, "sort": 0, @@ -118,9 +215,9 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "Shows ratio of errors compared to the the total number of handled requests against /query_frontend.", + "description": "Shows ratio of errors compared to the the total number of handled requests against Query Frontend.", "fill": 10, - "id": 2, + "id": 3, "legend": { "avg": false, "current": false, @@ -140,7 +237,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": true, "steppedLine": false, "targets": [ @@ -196,7 +293,7 @@ "datasource": "$datasource", "description": "Shows how long has it taken to handle requests in quantiles.", "fill": 1, - "id": 3, + "id": 4, "legend": { "avg": false, "current": false, @@ -235,7 +332,7 @@ } ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ @@ -314,6 +411,334 @@ "title": "Query Frontend API", "titleSize": "h6" }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show rate of cache requests.", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, tripperware) (rate(cortex_cache_request_duration_seconds_count{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show rate of Querier cache gets vs misses.", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, tripperware) (rate(querier_cache_gets_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cache gets - {{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job, tripperware) (rate(querier_cache_misses_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cache misses - {{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Querier cache gets vs misses", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows rate of cortex fetched keys.", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, tripperware) (rate(cortex_cache_fetched_keys{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Cortex fetched keys", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows rate of cortex cache hits.", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, tripperware) (rate(cortex_cache_hits{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Cortex cache hits", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cache Operations", + "titleSize": "h6" + }, { "collapse": true, "height": "250px", @@ -325,7 +750,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 9, "legend": { "avg": false, "current": false, @@ -441,7 +866,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 5, + "id": 10, "legend": { "avg": false, "current": false, @@ -517,7 +942,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 6, + "id": 11, "legend": { "avg": false, "current": false, diff --git a/mixin/dashboards/query_frontend.libsonnet b/mixin/dashboards/query_frontend.libsonnet index d2a2b62fbe..3c3728cb83 100644 --- a/mixin/dashboards/query_frontend.libsonnet +++ b/mixin/dashboards/query_frontend.libsonnet @@ -14,15 +14,21 @@ local utils = import '../lib/utils.libsonnet'; grafanaDashboards+:: { [if thanos.query_frontend != null then 'query_frontend.json']: local queryFrontendHandlerSelector = utils.joinLabels([thanos.query_frontend.dashboard.selector, 'handler="query-frontend"']); + local queryFrontendTripperwareSelector = utils.joinLabels([thanos.query_frontend.dashboard.selector, 'tripperware="query_range"']); + local queryFrontendOpSelector = utils.joinLabels([thanos.query_frontend.dashboard.selector, 'op="query_range"']); g.dashboard(thanos.query_frontend.title) .addRow( g.row('Query Frontend API') .addPanel( - g.panel('Rate', 'Shows rate of requests against /query_frontend for the given time.') + + g.panel('Rate of requests', 'Shows rate of requests against Query Frontend for the given time.') + g.httpQpsPanel('http_requests_total', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) ) .addPanel( - g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests against /query_frontend.') + + g.panel('Rate of queries', 'Shows rate of queries passing through Query Frontend') + + g.httpQpsPanel('thanos_query_frontend_queries_total', queryFrontendOpSelector, thanos.query_frontend.dashboard.dimensions) + ) + .addPanel( + g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests against Query Frontend.') + g.httpErrPanel('http_requests_total', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) ) .addPanel( @@ -30,6 +36,45 @@ local utils = import '../lib/utils.libsonnet'; g.latencyPanel('http_request_duration_seconds', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) ) ) + .addRow( + g.row('Cache Operations') + .addPanel( + g.panel('Requests', 'Show rate of cache requests.') + + g.queryPanel( + 'sum by (%s) (rate(cortex_cache_request_duration_seconds_count{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + '{{job}} {{tripperware}}', + ) + + g.stack + ) + .addPanel( + g.panel('Querier cache gets vs misses', 'Show rate of Querier cache gets vs misses.') + + g.queryPanel( + 'sum by (%s) (rate(querier_cache_gets_total{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + 'Cache gets - {{job}} {{tripperware}}', + ) + + g.queryPanel( + 'sum by (%s) (rate(querier_cache_misses_total{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + 'Cache misses - {{job}} {{tripperware}}', + ) + + g.stack + ) + .addPanel( + g.panel('Cortex fetched keys', 'Shows rate of cortex fetched keys.') + + g.queryPanel( + 'sum by (%s) (rate(cortex_cache_fetched_keys{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + '{{job}} {{tripperware}}', + ) + + g.stack + ) + .addPanel( + g.panel('Cortex cache hits', 'Shows rate of cortex cache hits.') + + g.queryPanel( + 'sum by (%s) (rate(cortex_cache_hits{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + '{{job}} {{tripperware}}', + ) + + g.stack + ) + ) .addRow( g.resourceUtilizationRow(thanos.query_frontend.dashboard.selector, thanos.query_frontend.dashboard.dimensions) ), From 3008c9424c284c2081adcabf3b5ab06fbd0e0318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9ssica=20Lins?= Date: Thu, 11 Nov 2021 10:04:51 -0300 Subject: [PATCH 5/7] Update mixin README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jéssica Lins --- mixin/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mixin/README.md b/mixin/README.md index baef01946c..03758eb256 100644 --- a/mixin/README.md +++ b/mixin/README.md @@ -88,6 +88,10 @@ This project is intended to be used as a library. You can extend and customize d selector: 'job=~".*thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, + query_frontend+:: { + selector: 'job=~".*thanos-query-frontend.*"', + title: '%(prefix)sQuery Frontend' % $.dashboard.prefix, + }, store+:: { selector: 'job=~".*thanos-store.*"', title: '%(prefix)sStore' % $.dashboard.prefix, From 429eb66d7696844cfadf37f4918acc69aedbeedb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9ssica=20Lins?= Date: Fri, 12 Nov 2021 10:16:20 -0300 Subject: [PATCH 6/7] Solve conflicts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jéssica Lins --- .bingo/prometheus.mod | 41 +---------------------------------------- CHANGELOG.md | 1 + 2 files changed, 2 insertions(+), 40 deletions(-) diff --git a/.bingo/prometheus.mod b/.bingo/prometheus.mod index 4040f753f4..5e496c7de9 100644 --- a/.bingo/prometheus.mod +++ b/.bingo/prometheus.mod @@ -20,43 +20,4 @@ replace ( k8s.io/klog => github.com/simonpasquier/klog-gokit v0.1.0 ) -require ( - github.com/Azure/azure-sdk-for-go v0.0.0-00010101000000-000000000000 // indirect - github.com/Azure/go-autorest v0.0.0-00010101000000-000000000000 // indirect - github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect - github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a // indirect - github.com/aws/aws-sdk-go v1.41.15 // indirect - github.com/cockroachdb/cmux v0.0.0-00010101000000-000000000000 // indirect - github.com/cockroachdb/cockroach v0.0.0-00010101000000-000000000000 // indirect - github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect - github.com/go-kit/kit v0.12.0 // indirect - github.com/golang/snappy v0.0.4 // indirect - github.com/googleapis/gnostic v0.0.0-00010101000000-000000000000 // indirect - github.com/gophercloud/gophercloud v0.0.0-00010101000000-000000000000 // indirect - github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect - github.com/hashicorp/consul/api v1.11.0 // indirect - github.com/jpillora/backoff v1.0.0 // indirect - github.com/julienschmidt/httprouter v1.3.0 // indirect - github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect - github.com/oklog/oklog v0.3.2 // indirect - github.com/oklog/run v1.1.0 // indirect - github.com/oklog/ulid v1.3.1 // indirect - github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect - github.com/peterbourgon/diskv v2.0.1+incompatible // indirect - github.com/prometheus/prometheus v2.4.3+incompatible // cmd/prometheus - github.com/prometheus/tsdb v0.0.0-00010101000000-000000000000 // indirect - github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 // indirect - github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect - github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect - golang.org/x/net v0.0.0-20211029224645-99673261e6eb // indirect - golang.org/x/oauth2 v0.0.0-20211028175245-ba495a64dcb5 // indirect - google.golang.org/api v0.60.0 // indirect - google.golang.org/genproto v0.0.0-20211101144312-62acf1d99145 // indirect - google.golang.org/grpc v1.41.0 // indirect - gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect - gopkg.in/fsnotify/fsnotify.v1 v1.4.7 // indirect - gopkg.in/inf.v0 v0.9.1 // indirect - k8s.io/api v0.0.0-00010101000000-000000000000 // indirect - k8s.io/apimachinery v0.0.0-00010101000000-000000000000 // indirect - k8s.io/client-go v0.0.0-00010101000000-000000000000 // indirect -) +require github.com/prometheus/prometheus v2.4.3+incompatible // cmd/prometheus diff --git a/CHANGELOG.md b/CHANGELOG.md index 35869f0266..191d3a8e7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4731](https://github.com/thanos-io/thanos/pull/4731) Rule: add stateless mode to ruler according to https://thanos.io/tip/proposals-accepted/202005-scalable-rule-storage.md/. Continue https://github.com/thanos-io/thanos/pull/4250. - [#4612](https://github.com/thanos-io/thanos/pull/4612) Sidecar: add `--prometheus.http-client` and `--prometheus.http-client-file` flag for sidecar to connect Prometheus with basic auth or TLS. - [#4848](https://github.com/thanos-io/thanos/pull/4848) Compactor: added Prometheus metric for tracking the progress of retention. +- [#4856](https://github.com/thanos-io/thanos/pull/4856) Mixin: Add Query Frontend Grafana dashboard. ### Fixed From ff226e249d069772a627193121cb864a4893f3f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9ssica=20Lins?= Date: Fri, 12 Nov 2021 10:06:44 -0300 Subject: [PATCH 7/7] Change to queryFrontend instead of query_frontend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jéssica Lins --- CHANGELOG.md | 1 - examples/dashboards/dashboards.md | 2 +- ...query_frontend.json => queryFrontend.json} | 2 +- mixin/README.md | 2 +- mixin/config.libsonnet | 2 +- mixin/dashboards/query_frontend.libsonnet | 32 +++++++++---------- 6 files changed, 20 insertions(+), 21 deletions(-) rename examples/dashboards/{query_frontend.json => queryFrontend.json} (99%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 191d3a8e7e..a0556f5078 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,6 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4576](https://github.com/thanos-io/thanos/pull/4576) UI: add filter compaction level to the Block UI. - [#4731](https://github.com/thanos-io/thanos/pull/4731) Rule: add stateless mode to ruler according to https://thanos.io/tip/proposals-accepted/202005-scalable-rule-storage.md/. Continue https://github.com/thanos-io/thanos/pull/4250. - [#4612](https://github.com/thanos-io/thanos/pull/4612) Sidecar: add `--prometheus.http-client` and `--prometheus.http-client-file` flag for sidecar to connect Prometheus with basic auth or TLS. -- [#4848](https://github.com/thanos-io/thanos/pull/4848) Compactor: added Prometheus metric for tracking the progress of retention. - [#4856](https://github.com/thanos-io/thanos/pull/4856) Mixin: Add Query Frontend Grafana dashboard. ### Fixed diff --git a/examples/dashboards/dashboards.md b/examples/dashboards/dashboards.md index 766ffcf2df..4cff60678d 100644 --- a/examples/dashboards/dashboards.md +++ b/examples/dashboards/dashboards.md @@ -5,7 +5,7 @@ There exists Grafana dashboards for each component (not all of them complete) ta - [Thanos Overview](overview.json) - [Thanos Compact](compact.json) - [Thanos Querier](query.json) -- [Thanos Query Frontend](query_frontend.json) +- [Thanos Query Frontend](queryFrontend.json) - [Thanos Store](store.json) - [Thanos Receiver](receive.json) - [Thanos Sidecar](sidecar.json) diff --git a/examples/dashboards/query_frontend.json b/examples/dashboards/queryFrontend.json similarity index 99% rename from examples/dashboards/query_frontend.json rename to examples/dashboards/queryFrontend.json index 644555b4f8..07b4b33346 100644 --- a/examples/dashboards/query_frontend.json +++ b/examples/dashboards/queryFrontend.json @@ -1112,6 +1112,6 @@ }, "timezone": "UTC", "title": "Thanos / Query Frontend", - "uid": "303c4e660a475c4c8cf6aee97da3a24a", + "uid": "9bc9f8bb21d4d18193c3fe772b36c306", "version": 0 } diff --git a/mixin/README.md b/mixin/README.md index 03758eb256..1ecedabb95 100644 --- a/mixin/README.md +++ b/mixin/README.md @@ -88,7 +88,7 @@ This project is intended to be used as a library. You can extend and customize d selector: 'job=~".*thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, - query_frontend+:: { + queryFrontend+:: { selector: 'job=~".*thanos-query-frontend.*"', title: '%(prefix)sQuery Frontend' % $.dashboard.prefix, }, diff --git a/mixin/config.libsonnet b/mixin/config.libsonnet index 962c351356..55e4e9cb17 100644 --- a/mixin/config.libsonnet +++ b/mixin/config.libsonnet @@ -28,7 +28,7 @@ selector: 'job=~".*thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, - query_frontend+:: { + queryFrontend+:: { selector: 'job=~".*thanos-query-frontend.*"', title: '%(prefix)sQuery Frontend' % $.dashboard.prefix, }, diff --git a/mixin/dashboards/query_frontend.libsonnet b/mixin/dashboards/query_frontend.libsonnet index 3c3728cb83..136f7405f4 100644 --- a/mixin/dashboards/query_frontend.libsonnet +++ b/mixin/dashboards/query_frontend.libsonnet @@ -3,7 +3,7 @@ local utils = import '../lib/utils.libsonnet'; { local thanos = self, - query_frontend+:: { + queryFrontend+:: { selector: error 'must provide selector for Thanos Query Frontend dashboard', title: error 'must provide title for Thanos Query Frontend dashboard', dashboard:: { @@ -12,28 +12,28 @@ local utils = import '../lib/utils.libsonnet'; }, }, grafanaDashboards+:: { - [if thanos.query_frontend != null then 'query_frontend.json']: - local queryFrontendHandlerSelector = utils.joinLabels([thanos.query_frontend.dashboard.selector, 'handler="query-frontend"']); - local queryFrontendTripperwareSelector = utils.joinLabels([thanos.query_frontend.dashboard.selector, 'tripperware="query_range"']); - local queryFrontendOpSelector = utils.joinLabels([thanos.query_frontend.dashboard.selector, 'op="query_range"']); - g.dashboard(thanos.query_frontend.title) + [if thanos.queryFrontend != null then 'queryFrontend.json']: + local queryFrontendHandlerSelector = utils.joinLabels([thanos.queryFrontend.dashboard.selector, 'handler="query-frontend"']); + local queryFrontendTripperwareSelector = utils.joinLabels([thanos.queryFrontend.dashboard.selector, 'tripperware="query_range"']); + local queryFrontendOpSelector = utils.joinLabels([thanos.queryFrontend.dashboard.selector, 'op="query_range"']); + g.dashboard(thanos.queryFrontend.title) .addRow( g.row('Query Frontend API') .addPanel( g.panel('Rate of requests', 'Shows rate of requests against Query Frontend for the given time.') + - g.httpQpsPanel('http_requests_total', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) + g.httpQpsPanel('http_requests_total', queryFrontendHandlerSelector, thanos.queryFrontend.dashboard.dimensions) ) .addPanel( g.panel('Rate of queries', 'Shows rate of queries passing through Query Frontend') + - g.httpQpsPanel('thanos_query_frontend_queries_total', queryFrontendOpSelector, thanos.query_frontend.dashboard.dimensions) + g.httpQpsPanel('thanos_query_frontend_queries_total', queryFrontendOpSelector, thanos.queryFrontend.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests against Query Frontend.') + - g.httpErrPanel('http_requests_total', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) + g.httpErrPanel('http_requests_total', queryFrontendHandlerSelector, thanos.queryFrontend.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') + - g.latencyPanel('http_request_duration_seconds', queryFrontendHandlerSelector, thanos.query_frontend.dashboard.dimensions) + g.latencyPanel('http_request_duration_seconds', queryFrontendHandlerSelector, thanos.queryFrontend.dashboard.dimensions) ) ) .addRow( @@ -41,7 +41,7 @@ local utils = import '../lib/utils.libsonnet'; .addPanel( g.panel('Requests', 'Show rate of cache requests.') + g.queryPanel( - 'sum by (%s) (rate(cortex_cache_request_duration_seconds_count{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + 'sum by (%s) (rate(cortex_cache_request_duration_seconds_count{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], '{{job}} {{tripperware}}', ) + g.stack @@ -49,11 +49,11 @@ local utils = import '../lib/utils.libsonnet'; .addPanel( g.panel('Querier cache gets vs misses', 'Show rate of Querier cache gets vs misses.') + g.queryPanel( - 'sum by (%s) (rate(querier_cache_gets_total{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + 'sum by (%s) (rate(querier_cache_gets_total{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], 'Cache gets - {{job}} {{tripperware}}', ) + g.queryPanel( - 'sum by (%s) (rate(querier_cache_misses_total{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + 'sum by (%s) (rate(querier_cache_misses_total{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], 'Cache misses - {{job}} {{tripperware}}', ) + g.stack @@ -61,7 +61,7 @@ local utils = import '../lib/utils.libsonnet'; .addPanel( g.panel('Cortex fetched keys', 'Shows rate of cortex fetched keys.') + g.queryPanel( - 'sum by (%s) (rate(cortex_cache_fetched_keys{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + 'sum by (%s) (rate(cortex_cache_fetched_keys{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], '{{job}} {{tripperware}}', ) + g.stack @@ -69,14 +69,14 @@ local utils = import '../lib/utils.libsonnet'; .addPanel( g.panel('Cortex cache hits', 'Shows rate of cortex cache hits.') + g.queryPanel( - 'sum by (%s) (rate(cortex_cache_hits{%s}[$interval]))' % [utils.joinLabels([thanos.query_frontend.dashboard.dimensions, 'tripperware']), thanos.query_frontend.dashboard.selector], + 'sum by (%s) (rate(cortex_cache_hits{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], '{{job}} {{tripperware}}', ) + g.stack ) ) .addRow( - g.resourceUtilizationRow(thanos.query_frontend.dashboard.selector, thanos.query_frontend.dashboard.dimensions) + g.resourceUtilizationRow(thanos.queryFrontend.dashboard.selector, thanos.queryFrontend.dashboard.dimensions) ), }, }