diff --git a/charts/meta-monitoring/Chart.lock b/charts/meta-monitoring/Chart.lock index 2284c09..9b2f17d 100644 --- a/charts/meta-monitoring/Chart.lock +++ b/charts/meta-monitoring/Chart.lock @@ -1,10 +1,25 @@ dependencies: -- name: loki +- name: alloy repository: https://grafana.github.io/helm-charts - version: 6.6.2 + version: 0.7.0 +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.7.0 - name: alloy repository: https://grafana.github.io/helm-charts - version: 0.3.2 + version: 0.7.0 +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.7.0 +- name: prometheus-node-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 4.39.0 +- name: kube-state-metrics + repository: https://prometheus-community.github.io/helm-charts + version: 5.25.1 +- name: loki + repository: https://grafana.github.io/helm-charts + version: 6.6.2 - name: mimir-distributed repository: https://grafana.github.io/helm-charts version: 5.3.0 @@ -14,5 +29,5 @@ dependencies: - name: minio repository: https://charts.min.io version: 5.2.0 -digest: sha256:f62b04092f6e7e89e6a427b586e4c0cbfcd54ddbd411846770ee684cf86f822a -generated: "2024-06-03T07:02:47.330637227Z" +digest: sha256:392372cf41bae2bc475417ae7308f81f226f5c24a3557fe31a65338aad25b2f8 +generated: "2024-10-09T07:39:50.634321671-05:00" diff --git a/charts/meta-monitoring/Chart.yaml b/charts/meta-monitoring/Chart.yaml index 29ec44b..bcc9b64 100644 --- a/charts/meta-monitoring/Chart.yaml +++ b/charts/meta-monitoring/Chart.yaml @@ -18,15 +18,45 @@ version: 1.0.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.0.1" +appVersion: "0.0.2" dependencies: + +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.7.0 + alias: alloy-metrics + condition: alloy-metrics.enabled +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.7.0 + alias: alloy-logs + condition: alloy-logs.enabled +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.7.0 + alias: alloy-events + condition: alloy-events.enabled +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.7.0 + alias: alloy-traces + condition: alloy-traces.enabled + +- name: prometheus-node-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 4.39.0 + alias: node-exporter + condition: node-exporter.enabled +- name: kube-state-metrics + repository: https://prometheus-community.github.io/helm-charts + version: 5.25.1 + alias: kube-state-metrics + condition: kube-state-metrics.enabled + - name: loki repository: https://grafana.github.io/helm-charts version: 6.6.2 condition: local.logs.enabled -- name: alloy - repository: https://grafana.github.io/helm-charts - version: 0.3.2 - name: mimir-distributed repository: https://grafana.github.io/helm-charts version: 5.3.0 diff --git a/charts/meta-monitoring/charts/alloy-0.3.2.tgz b/charts/meta-monitoring/charts/alloy-0.3.2.tgz deleted file mode 100644 index cddb68d..0000000 Binary files a/charts/meta-monitoring/charts/alloy-0.3.2.tgz and /dev/null differ diff --git a/charts/meta-monitoring/charts/alloy-0.7.0.tgz b/charts/meta-monitoring/charts/alloy-0.7.0.tgz new file mode 100644 index 0000000..ee89664 Binary files /dev/null and b/charts/meta-monitoring/charts/alloy-0.7.0.tgz differ diff --git a/charts/meta-monitoring/charts/kube-state-metrics-5.25.1.tgz b/charts/meta-monitoring/charts/kube-state-metrics-5.25.1.tgz new file mode 100644 index 0000000..6d36229 Binary files /dev/null and b/charts/meta-monitoring/charts/kube-state-metrics-5.25.1.tgz differ diff --git a/charts/meta-monitoring/charts/prometheus-node-exporter-4.39.0.tgz b/charts/meta-monitoring/charts/prometheus-node-exporter-4.39.0.tgz new file mode 100644 index 0000000..0750218 Binary files /dev/null and b/charts/meta-monitoring/charts/prometheus-node-exporter-4.39.0.tgz differ diff --git a/charts/meta-monitoring/src/dashboards/alloy-cluster-node.json b/charts/meta-monitoring/src/dashboards/alloy/alloy-cluster-node.json similarity index 100% rename from charts/meta-monitoring/src/dashboards/alloy-cluster-node.json rename to charts/meta-monitoring/src/dashboards/alloy/alloy-cluster-node.json diff --git a/charts/meta-monitoring/src/dashboards/alloy-cluster-overview.json b/charts/meta-monitoring/src/dashboards/alloy/alloy-cluster-overview.json similarity index 100% rename from charts/meta-monitoring/src/dashboards/alloy-cluster-overview.json rename to charts/meta-monitoring/src/dashboards/alloy/alloy-cluster-overview.json diff --git a/charts/meta-monitoring/src/dashboards/alloy-controller.json b/charts/meta-monitoring/src/dashboards/alloy/alloy-controller.json similarity index 100% rename from charts/meta-monitoring/src/dashboards/alloy-controller.json rename to charts/meta-monitoring/src/dashboards/alloy/alloy-controller.json diff --git a/charts/meta-monitoring/src/dashboards/alloy-opentelemetry.json b/charts/meta-monitoring/src/dashboards/alloy/alloy-opentelemetry.json similarity index 100% rename from charts/meta-monitoring/src/dashboards/alloy-opentelemetry.json rename to charts/meta-monitoring/src/dashboards/alloy/alloy-opentelemetry.json diff --git a/charts/meta-monitoring/src/dashboards/alloy-prometheus.json b/charts/meta-monitoring/src/dashboards/alloy/alloy-prometheus.json similarity index 100% rename from charts/meta-monitoring/src/dashboards/alloy-prometheus.json rename to charts/meta-monitoring/src/dashboards/alloy/alloy-prometheus.json diff --git a/charts/meta-monitoring/src/dashboards/alloy-resources.json b/charts/meta-monitoring/src/dashboards/alloy/alloy-resources.json similarity index 100% rename from charts/meta-monitoring/src/dashboards/alloy-resources.json rename to charts/meta-monitoring/src/dashboards/alloy/alloy-resources.json diff --git a/charts/meta-monitoring/src/dashboards/grafana/grafana-overview.json b/charts/meta-monitoring/src/dashboards/grafana/grafana-overview.json new file mode 100644 index 0000000..ed16a77 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/grafana/grafana-overview.json @@ -0,0 +1,534 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 3085, + "iteration": 1631554945276, + "links": [], + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["mean"], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Firing Alerts", + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["mean"], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Dashboards", + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "align": null, + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 10, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Build Info", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "branch": true, + "container": true, + "goversion": true, + "namespace": true, + "pod": true, + "revision": true + }, + "indexByName": { + "Time": 7, + "Value": 11, + "branch": 4, + "container": 8, + "edition": 2, + "goversion": 6, + "instance": 1, + "job": 0, + "namespace": 9, + "pod": 10, + "revision": 5, + "version": 3 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 5 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ", + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "RPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:157", + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:158", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 5 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "interval": "", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "interval": "", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:210", + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:211", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 30, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "dev-cortex", + "value": "dev-cortex" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": ["default/grafana"], + "value": ["default/grafana"] + }, + "datasource": "$datasource", + "definition": "label_values(grafana_build_info, job)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "job", + "options": [], + "query": { + "query": "label_values(grafana_build_info, job)", + "refId": "Billing Admin-job-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "definition": "label_values(grafana_build_info, instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(grafana_build_info, instance)", + "refId": "Billing Admin-instance-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": ["10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"] + }, + "timezone": "", + "title": "Grafana Overview", + "uid": "6be0s85Mk", + "version": 2 +} diff --git a/charts/meta-monitoring/src/dashboards/loki/loki-bloom-compactor.json b/charts/meta-monitoring/src/dashboards/loki/loki-bloom-compactor.json new file mode 100644 index 0000000..c365fab --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/loki/loki-bloom-compactor.json @@ -0,0 +1,5265 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 111, + "panels": [ ], + "targets": [ ], + "title": "Overview", + "type": "row" + }, + { + "gridPos": { + "h": 8, + "w": 14, + "x": 0, + "y": 1 + }, + "id": 35, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "## About the Bloom Compactor\nThe compactor iterates through chunks and creates blooms out of them.\nThe size of the resulting blooms depends on the bloom filter settings, the tokenizer settings, the number of ring tokens per compactor and the total number opf compactors.\n\nCompactors are horizontally scalable and uses a ring to:\n- Shard tenants\n- Shard series fingerprints within a tenant subring.\n\nThe blooms for the series are grouped together in blocks which are flushed to object store.", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Cell-wide compaction progress. Should increase till completion throughout each compaction period.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 42, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(loki_bloomcompactor_progress{cluster=~\"$cluster\", job=~\"$namespace/bloom-compactor\"})\n/\nsum(count(loki_bloomcompactor_progress{cluster=~\"$cluster\", job=~\"$namespace/bloom-compactor\"}))", + "hide": false, + "instant": false, + "legendFormat": "avg", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.9, \n sum by (pod) (\n loki_bloomcompactor_progress{cluster=~\"$cluster\", job=~\"$namespace/bloom-compactor\"}\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.1, \n sum by (pod) (\n loki_bloomcompactor_progress{cluster=~\"$cluster\", job=~\"$namespace/bloom-compactor\"}\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p10", + "range": true, + "refId": "C" + } + ], + "title": "Progress", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Uncompressed size of chunks in a series VS the size of the blooms built.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Ratio" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Ratio over range" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 41, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_size_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Bloom", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloomcompactor_chunk_series_size_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Chunk", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_size_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(loki_bloomcompactor_chunk_series_size_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Ratio", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_size_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(loki_bloomcompactor_chunk_series_size_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Ratio over range", + "range": true, + "refId": "D" + } + ], + "title": "Chunks and Bloom size", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "Blooms size vs uncompressed chunk size.", + "gridPos": { + "h": 7, + "w": 17, + "x": 0, + "y": 16 + }, + "id": 51, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} |= \"level=error\" |= \"component=bloom-compactor\"", + "queryType": "range", + "refId": "B" + } + ], + "title": "Errors", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 17, + "y": 16 + }, + "id": 53, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "sum(count_over_time({cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} |= \"level=error\" |= \"component=bloom-compactor\" [$__auto]))", + "queryType": "range", + "refId": "A" + } + ], + "title": "Errors Rate", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 112, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 114, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n loki_bloomcompactor_progress{cluster=~\"$cluster\", job=~\"$namespace/bloom-compactor\"}\n)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Progress per pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "fieldMinMax": false, + "mappings": [ ], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 0 + }, + { + "color": "green", + "value": 100 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 115, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": false, + "sizing": "auto" + }, + "panels": [ ], + "pluginVersion": "11.0.0-68102", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n loki_bloomcompactor_progress{cluster=~\"$cluster\", job=~\"$namespace/bloom-compactor\"}\n)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Current Progress per pod", + "type": "gauge" + } + ], + "targets": [ ], + "title": "Progress per pod", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 56, + "panels": [ + { + "description": "", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 85, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "title": "We use tenant sharding so each compactor will process a subset of the tenants.", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Shows the expected number of cpu cores we need to provision to build blooms as fast as we ingest data so a compaction iteration doesn't take longer than the compaction interval.\n\nWe may decide to have more to speed up compaction.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 94, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# This query shows the expected number of cpu cores we need to not fall behind\n# building blooms for data we're ingesting.\n# conceptually, the formula is:\n# (cell_bytes * space_amplification / bloom_bytes_processed_per_core)\n\n# number of replicas needed\nsum(avg_over_time(loki_cell:bytes:rate1m{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n*\n## Space amplification (how much data do we write compared to what we ingest?)\n(\n # rep factor\n 3 *\n sum(\n # 1 - dedupe_ratio\n 1 - \n sum(rate(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (cluster, namespace)\n /\n sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (cluster, namespace)\n )\n)\n/\n(\nsum(rate(loki_bloomcompactor_chunk_series_size_sum{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))\n/\nsum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))\n)", + "hide": false, + "instant": false, + "legendFormat": "Needed", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Available", + "range": true, + "refId": "A" + } + ], + "title": "Required CPUs to not lag behind", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 72, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# MB/s/core chunk data processed\nsum(rate(loki_bloomcompactor_chunk_series_size_sum{cluster=~\"$cluster\", job=~\"$namespace/bloom-compactor\"}[$__rate_interval])) by (pod)\n/\nsum(rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])) by (pod)", + "hide": true, + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# MB/s/core chunk data processed\nsum(rate(loki_bloomcompactor_chunk_series_size_sum{cluster=~\"$cluster\", job=~\"$namespace/bloom-compactor\"}[$__rate_interval]))\n/\nsum(rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + } + ], + "title": "MB/s per core", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\", resource=\"cpu\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_limits{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\", resource=\"cpu\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.99,\n rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"}[$__rate_interval])\n)", + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.50,\n rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"}[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "avg(\n rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"}[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "Avg", + "range": true, + "refId": "E" + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 75, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\", resource=\"cpu\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_limits{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\", resource=\"cpu\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + } + ], + "title": "CPU per pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 76, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\", resource=\"memory\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile (\n 0.99,\n container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"}\n)", + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile (\n 0.50,\n container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"}\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "avg (\n container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"}\n)", + "hide": false, + "instant": false, + "legendFormat": "Avg", + "range": true, + "refId": "E" + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "bloom-compactor-106" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"})", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\", resource=\"memory\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-compactor\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + } + ], + "title": "Memory per pod (workingset)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum (\n increase(\n kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[10m]\n )\n) > 0", + "instant": false, + "legendFormat": "Restarts", + "range": true, + "refId": "A" + } + ], + "title": "Container restarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 77, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "(\n sum by (pod) (\n increase(\n kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[10m]\n )\n )\n * on (pod) group_right\n max by (pod, reason) (\n kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}\n )\n) > 0", + "instant": false, + "legendFormat": "{{reason}} / {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Container restarts reason per pod", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Resource Usage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 95, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "How many tokens each compactor is appending to blooms. Accounts for tokens that are not actually added to the blooms since they are already there. See the panel on the right for a drill down on the collision.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 55 + }, + "id": 96, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# tokens checked per pod, millions/s\nsum(rate(loki_bloom_tokens_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))\n/\nsum(count(loki_bloom_tokens_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}))\n/ 1e6", + "hide": false, + "instant": false, + "legendFormat": "Per core", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_inserts_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])) / 1e6", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "C" + } + ], + "title": "Tokens rate (millions)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Collision type may be `false` (no collision), `cache` (found in token cache) or true (found in bloom filter).\n\nType may be either `raw` (the original ngram) or `chunk_prefixed` (the ngram with the chunk prefix)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 55 + }, + "id": 97, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# tokens/s by type+collision\nsum by (collision) (\n rate(loki_bloom_inserts_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n) \n/ on () group_left\nsum (\n rate(loki_bloom_inserts_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "tokens/s by collision type", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The sizes of the blooms created by the compactor. We build one bloom per series. The more unique ngrams and chunks the series has, the bigger their blooms will be.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 98, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(\n 0.99, \n sum by (le) (\n rate(loki_bloom_size_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(\n 0.90, \n sum by (le) (\n rate(loki_bloom_size_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(\n 0.50, \n sum by (le) (\n rate(loki_bloom_size_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "F" + } + ], + "title": "Bloom size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "How many chunks are we indexing in the blooms. Either:\n- `copied` from a pre-existing bloom block, or \n- `iterated` through all its entries if processed for the first time.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 99, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# chunks indexed, by iteration or copied from a pre-existing bloom\nsum(rate(loki_bloom_chunks_indexed_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])) by (type)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Chunks indexed", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Bloom building", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 103, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 72 + }, + "id": 107, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(increase(loki_bloomcompactor_blocks_created_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Blocks", + "range": true, + "refId": "A" + } + ], + "title": "Created Blocks", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Compactors delete metas and blocks marked for deletion in the metas tombstones.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 72 + }, + "id": 106, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(increase(loki_bloomcompactor_blocks_deleted_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Blocks", + "range": true, + "refId": "A" + } + ], + "title": "Deleted Blocks", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Number of overlapping bloom blocks reused when creating new blocks\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 79 + }, + "id": 109, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(increase(loki_bloomcompactor_blocks_reused_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Blocks", + "range": true, + "refId": "A" + } + ], + "title": "Blocks reused", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Blocks building", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 110, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 87 + }, + "id": 108, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(increase(loki_bloomcompactor_metas_created_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Metas", + "range": true, + "refId": "A" + } + ], + "title": "Created Metas", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Compactors delete metas and blocks marked for deletion in the metas tombstones.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 87 + }, + "id": 105, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "pluginVersion": "11.1.0-69868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(increase(loki_bloomcompactor_metas_deleted_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Metas", + "range": true, + "refId": "A" + } + ], + "title": "Deleted Metas", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Metas building", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 80, + "panels": [ + { + "description": "", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 95 + }, + "id": 93, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "title": "We use tenant sharding so each compactor will process a subset of the tenants.", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 96 + }, + "id": 83, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.99,\n increase(\n loki_bloomcompactor_tenants_started_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.90,\n increase(\n loki_bloomcompactor_tenants_started_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.50,\n increase(\n loki_bloomcompactor_tenants_started_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[30m]\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "B" + } + ], + "title": "Tenants", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 96 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n increase(\n loki_bloomcompactor_tenants_started_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "C" + } + ], + "title": "Tenants per pod", + "type": "timeseries" + }, + { + "description": "", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 103 + }, + "id": 86, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "title": "Number of tenant tables processed. ", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 104 + }, + "id": 88, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.99,\n increase(\n loki_bloomcompactor_tenant_table_ranges_completed_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.90,\n increase(\n loki_bloomcompactor_tenant_table_ranges_completed_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.50,\n increase(\n loki_bloomcompactor_tenant_table_ranges_completed_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "B" + } + ], + "title": "Tenant Tables", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 104 + }, + "id": 89, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n increase(\n loki_bloomcompactor_tenant_table_ranges_completed_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval]\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "C" + } + ], + "title": "Tenant Tables per pod", + "type": "timeseries" + }, + { + "description": "", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 111 + }, + "id": 87, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "title": "Series per compaction (includes series copied from other blocks)", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 112 + }, + "id": 81, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# series checked per compaction\nhistogram_quantile(\n 0.99, \n sum by (le) (\n rate(loki_bloomcompactor_series_per_compaction_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# series checked per compaction\nhistogram_quantile(\n 0.9, \n sum by (le) (\n rate(loki_bloomcompactor_series_per_compaction_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# series checked per compaction\nhistogram_quantile(\n 0.5, \n sum by (le) (\n rate(loki_bloomcompactor_series_per_compaction_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + } + ], + "title": "Series", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 112 + }, + "id": 82, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n rate(loki_bloomcompactor_series_per_compaction_sum{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n /\n rate(loki_bloomcompactor_series_per_compaction_count{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "C" + } + ], + "title": "avg series per compaction by pod", + "type": "timeseries" + }, + { + "description": "", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 119 + }, + "id": 90, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "title": "Number of bytes from chunks added to blocks during each compaction.", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 120 + }, + "id": 91, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# series checked per compaction\nhistogram_quantile(\n 0.99, \n sum by (le) (\n rate(loki_bloomcompactor_bytes_per_compaction_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# series checked per compaction\nhistogram_quantile(\n 0.9, \n sum by (le) (\n rate(loki_bloomcompactor_bytes_per_compaction_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "# series checked per compaction\nhistogram_quantile(\n 0.5, \n sum by (le) (\n rate(loki_bloomcompactor_bytes_per_compaction_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + } + ], + "title": "Bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 120 + }, + "id": 92, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n rate(loki_bloomcompactor_bytes_per_compaction_sum{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n /\n rate(loki_bloomcompactor_bytes_per_compaction_count{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"}[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "avg bytes per compaction by pod", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Data processed", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 58, + "panels": [ + { + "description": "", + "fieldConfig": { + "defaults": { }, + "overrides": [ ] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 82 + }, + "id": 47, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\nCompactors write blocks to the attached PVs before flushing them into the object store.\nIt also download chunks and index files.\n\nAfter compacting a given tenant, all the downloaded index files and chunks, as well as the already flushed blocks are deleted.", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-69747", + "targets": [ ], + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [ ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 0.80000000000000004 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 85 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.99,\n max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} \n / \n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n ) \n and \n count by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-compactor\"}\n )\n)", + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.90,\n max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} \n / \n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n ) \n and \n count by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-compactor\"}\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.50,\n max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} \n / \n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n ) \n and \n count by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-compactor\"}\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "C" + } + ], + "title": "Disk Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [ ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 0.80000000000000004 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 85 + }, + "id": 100, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-compactor\"})", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Disk Utilization per pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 93 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.99,\n sum by(instance, pod, device) (\n rate(node_disk_written_bytes_total[$__rate_interval])\n ) \n + ignoring(pod) group_right() \n (\n label_replace(\n count by(instance, pod, device) (\n container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\", device!~\".*sda.*\"}\n ), \n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ) * 0\n )\n)", + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.90,\n sum by(instance, pod, device) (\n rate(node_disk_written_bytes_total[$__rate_interval])\n ) \n + ignoring(pod) group_right() \n (\n label_replace(\n count by(instance, pod, device) (\n container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\", device!~\".*sda.*\"}\n ), \n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ) * 0\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.50,\n sum by(instance, pod, device) (\n rate(node_disk_written_bytes_total[$__rate_interval])\n ) \n + ignoring(pod) group_right() \n (\n label_replace(\n count by(instance, pod, device) (\n container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\", device!~\".*sda.*\"}\n ), \n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ) * 0\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "C" + } + ], + "title": "Disk Writes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 93 + }, + "id": 101, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Disk Writes per pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 100 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.99,\n sum by(instance, pod, device) (\n rate(node_disk_read_bytes_total[$__rate_interval])\n ) + ignoring(pod) group_right()\n (\n label_replace(\n count by(instance, pod, device) (\n container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\", device!~\".*sda.*\"}\n ), \n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ) * 0\n )\n)", + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.90,\n sum by(instance, pod, device) (\n rate(node_disk_read_bytes_total[$__rate_interval])\n ) + ignoring(pod) group_right()\n (\n label_replace(\n count by(instance, pod, device) (\n container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\", device!~\".*sda.*\"}\n ), \n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ) * 0\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "quantile(\n 0.50,\n sum by(instance, pod, device) (\n rate(node_disk_read_bytes_total[$__rate_interval])\n ) + ignoring(pod) group_right()\n (\n label_replace(\n count by(instance, pod, device) (\n container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\", device!~\".*sda.*\"}\n ), \n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ) * 0\n )\n)", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "C" + } + ], + "title": "Disk Reads", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 100 + }, + "id": 102, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-compactor\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Disk Reads per pod", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Disk Usage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 62, + "panels": [ + { + "description": "", + "fieldConfig": { + "defaults": { }, + "overrides": [ ] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 83 + }, + "id": 71, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Once all blocks and metas are built locally, the compactor flushes them to the object store.\n\nAfter each iteration, the compactor deletes the metas and blocks marked for deletion in the tombstones.", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-69747", + "targets": [ ], + "title": "", + "transparent": true, + "type": "text" + }, + { + "description": "", + "fieldConfig": { + "defaults": { }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 86 + }, + "id": 63, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "---\n#### GCS\n", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-69747", + "targets": [ ], + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 2, + "y": 86 + }, + "id": 61, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status_code, operation) (rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} {{status_code}}", + "range": true, + "refId": "B" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 13, + "y": 86 + }, + "id": 64, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p99", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by (operation, le) (rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p90", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (operation, le) (rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p50", + "range": true, + "refId": "F" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "description": "", + "fieldConfig": { + "defaults": { }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 93 + }, + "id": 65, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "---\n#### S3\n", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-69747", + "targets": [ ], + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 2, + "y": 93 + }, + "id": 67, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status_code, operation) (rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} {{status_code}}", + "range": true, + "refId": "B" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 13, + "y": 93 + }, + "id": 69, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p99", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by (operation, le) (rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p90", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (operation, le) (rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p50", + "range": true, + "refId": "F" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "description": "", + "fieldConfig": { + "defaults": { }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 100 + }, + "id": 66, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "---\n#### Azure\nBlob Storage", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-69747", + "targets": [ ], + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 2, + "y": 100 + }, + "id": 68, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status_code, operation) (rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} {{status_code}}", + "range": true, + "refId": "B" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 13, + "y": 100 + }, + "id": 70, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p99", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by (operation, le) (rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p90", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (operation, le) (rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-compactor\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p50", + "range": true, + "refId": "F" + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Object Store", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [ ], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Bloom Compactor", + "uid": "bloom-compactor", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/loki/loki-bloom-gateway.json b/charts/meta-monitoring/src/dashboards/loki/loki-bloom-gateway.json new file mode 100644 index 0000000..2d5e16a --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/loki/loki-bloom-gateway.json @@ -0,0 +1,5092 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 73, + "panels": [ ], + "targets": [ ], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Percentage of chunks that are filtered by using bloom filters", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds", + "seriesBy": "last" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [ ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 0.5 + }, + { + "color": "yellow", + "value": 0.75 + }, + { + "color": "green", + "value": 0.90000000000000002 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_gateway_filtered_chunks_sum{job=\"$namespace/bloom-gateway\"}[$__rate_interval]))\n/\nsum(rate(loki_bloom_gateway_requested_chunks_sum{job=\"$namespace/bloom-gateway\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "Chunks", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(loki_bloom_gateway_filtered_series_sum{job=\"$namespace/bloom-gateway\"}[$__rate_interval]))\n/\nsum(rate(loki_bloom_gateway_requested_series_sum{job=\"$namespace/bloom-gateway\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Series", + "range": true, + "refId": "B" + } + ], + "title": "Filter ratio", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Percentage of chunks that are filtered by using bloom filters", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 0.5 + }, + { + "color": "yellow", + "value": 0.75 + }, + { + "color": "green", + "value": 0.90000000000000002 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 75, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(loki_bloom_gateway_filtered_chunks_sum{job=\"$namespace/bloom-gateway\"}[$__rate_interval]))\n/\nsum(rate(loki_bloom_gateway_requested_chunks_sum{job=\"$namespace/bloom-gateway\"}[$__rate_interval]))", + "instant": true, + "legendFormat": "Chunks", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(loki_bloom_gateway_filtered_series_sum{job=\"$namespace/bloom-gateway\"}[$__rate_interval]))\n/\nsum(rate(loki_bloom_gateway_requested_series_sum{job=\"$namespace/bloom-gateway\"}[$__rate_interval]))", + "hide": false, + "instant": true, + "legendFormat": "Series", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(loki_bloom_gateway_filtered_chunks_sum{job=\"$namespace/bloom-gateway\"})\n/\nsum(loki_bloom_gateway_requested_chunks_sum{job=\"$namespace/bloom-gateway\"})", + "hide": true, + "instant": true, + "legendFormat": "Chunks avg", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(loki_bloom_gateway_filtered_series_sum{job=\"$namespace/bloom-gateway\"})\n/\nsum(loki_bloom_gateway_requested_series_sum{job=\"$namespace/bloom-gateway\"})", + "hide": true, + "instant": true, + "legendFormat": "Series avg", + "range": false, + "refId": "D" + } + ], + "title": "Filter ratio", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Desired" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 72, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "kube_statefulset_status_replicas_ready{cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"bloom-gateway\"}", + "hide": false, + "instant": false, + "legendFormat": "Ready", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_status_ready{container=\"bloom-gateway\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "hide": true, + "instant": false, + "legendFormat": "Running", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "kube_statefulset_replicas{cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"bloom-gateway\"}", + "hide": false, + "instant": false, + "legendFormat": "Desired", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "kube_statefulset_status_replicas_available{cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"bloom-gateway\"}", + "hide": true, + "instant": false, + "legendFormat": "Available", + "range": true, + "refId": "C" + } + ], + "title": "Readiness", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 50, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 37, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "(\n max by (pod, reason) (kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"})\n * on (pod) group_left\n sum by (pod) (increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval]))\n) > 0", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{pod}} ({{reason}})", + "range": true, + "refId": "C" + } + ], + "title": "Container restarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "gridPos": { + "h": 9, + "w": 15, + "x": 0, + "y": 7 + }, + "id": 48, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} |= \"level=error\" or \"panic:\" | logfmt", + "queryType": "range", + "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} |= \"level=warn\" | logfmt", + "hide": true, + "queryType": "range", + "refId": "B" + } + ], + "title": "Errors", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "symlog" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "warn" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "panic" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 15, + "y": 7 + }, + "id": 52, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "sum by (level) (count_over_time({cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} |~ \"level=(warn|error)\" | logfmt [$__auto]))", + "legendFormat": "{{ level }}", + "queryType": "range", + "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "sum (count_over_time({cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} |= \"panic:\" | logfmt [$__auto]))", + "hide": false, + "legendFormat": "panic", + "queryType": "range", + "refId": "B" + } + ], + "title": "Errors Rate", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 56, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_limits{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 81, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})", + "hide": true, + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}) by (pod)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "D" + } + ], + "title": "Memory (heap inuse)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 31 + }, + "id": 87, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (rate(go_gc_cycles_total_gc_cycles_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "GC rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 31 + }, + "id": 88, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (rate(go_gc_duration_seconds_sum{container=\"bloom-gateway\"}[$__rate_interval]))\n/\nsum by (pod) (rate(go_gc_duration_seconds_count{container=\"bloom-gateway\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "GC duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 31 + }, + "id": 89, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(go_gc_pauses_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval])) by (le))", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(go_gc_pauses_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval])) by (le))", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "GC pauses", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by(instance, pod) (rate(node_disk_read_bytes_total[$__rate_interval]))\n+ ignoring(pod) group_right() \n(count by(instance, pod) (container_fs_reads_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}) * 0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "D" + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 85, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by(instance, pod) (rate(node_disk_written_bytes_total[$__rate_interval]))\n+ ignoring(pod) group_right() \n(count by(instance, pod) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}) * 0)", + "hide": false, + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "D" + } + ], + "title": "Disk writes", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Resource usage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 2, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status_code) (\n rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/bloom-gateway\", route=\"/logproto.BloomGateway/FilterChunkRefs\"}[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 86, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/bloom-gateway\", route=\"/logproto.BloomGateway/FilterChunkRefs\"}[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "QPS per Pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"/logproto.BloomGateway/FilterChunkRefs\"}))", + "hide": false, + "instant": false, + "legendFormat": "{{ route }} 50th percentile", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"/logproto.BloomGateway/FilterChunkRefs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"/logproto.BloomGateway/FilterChunkRefs\"}) by (route) ", + "hide": false, + "instant": false, + "legendFormat": "{{ route }} Average", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"/logproto.BloomGateway/FilterChunkRefs\"}))", + "hide": false, + "instant": false, + "legendFormat": "{{ route }} 99th percentile", + "range": true, + "refId": "D" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99,\n sum(\n rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"/logproto.BloomGateway/FilterChunkRefs\"}[$__rate_interval])\n ) by (pod, le)\n )\n", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Per Pod Latency (p99)", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "QPS and Latency", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 58, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 11 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (pod) (loki_bloom_gateway_queue_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"})\n/\nsum by (pod) (loki_bloom_gateway_queue_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"})\n", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum (loki_bloom_gateway_queue_length{cluster=\"$cluster\", namespace=\"$namespace\"})", + "hide": true, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (user) (loki_bloom_gateway_queue_length{cluster=\"$cluster\", namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "{{user}}", + "range": true, + "refId": "B" + } + ], + "title": "Queue Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "For how long do pending tasks stay in the queue", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 11 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(loki_bloom_gateway_queue_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by (le) (rate(loki_bloom_gateway_queue_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "hide": true, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (le) (rate(loki_bloom_gateway_queue_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum (loki_bloom_gateway_queue_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"})\n/\nsum (loki_bloom_gateway_queue_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "avg", + "range": true, + "refId": "D" + } + ], + "title": "Queue Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Inflight requests tracks all tasks both queued and in progress", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 11 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (quantile) (loki_bloom_gateway_inflight_tasks{cluster=\"$cluster\", namespace=\"$namespace\", quantile=\"0.99\"})", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Inflight tasks", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Task Queue", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 68, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 69, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(loki_bloom_gateway_process_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",container=\"bloom-gateway\"}[$__rate_interval])) by (le, status))", + "instant": false, + "legendFormat": "{{status}}-p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_bloom_gateway_process_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",container=\"bloom-gateway\"}[$__rate_interval])) by (le, status))", + "hide": false, + "instant": false, + "legendFormat": "{{status}}-p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(loki_bloom_gateway_process_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",container=\"bloom-gateway\"}[$__rate_interval])) by (le, status))", + "hide": false, + "instant": false, + "legendFormat": "{{status}}-p90", + "range": true, + "refId": "C" + } + ], + "title": "Processing time for tasks (per worker iteration)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 70, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(loki_bloom_gateway_block_query_latency_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",container=\"bloom-gateway\"}[$__rate_interval])) by (le, status))", + "instant": false, + "legendFormat": "{{status}}-p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_bloom_gateway_block_query_latency_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",container=\"bloom-gateway\"}[$__rate_interval])) by (le, status))", + "hide": false, + "instant": false, + "legendFormat": "{{status}}-p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(loki_bloom_gateway_block_query_latency_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",container=\"bloom-gateway\"}[$__rate_interval])) by (le, status))", + "hide": false, + "instant": false, + "legendFormat": "{{status}}-p90", + "range": true, + "refId": "C" + } + ], + "title": "Block query latency (single block)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 71, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_gateway_tasks_dequeued_total{cluster=\"$cluster\",namespace=\"$namespace\",container=\"bloom-gateway\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "dequeued", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status) (rate(loki_bloom_gateway_tasks_processed_total{cluster=\"$cluster\",namespace=\"$namespace\",container=\"bloom-gateway\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "processed {{status}}", + "range": true, + "refId": "B" + } + ], + "title": "Tasks dequeued/processed", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Processing", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 59, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 19, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "title": "We cache bloom blocks in memory to prevent the gateway from hitting the object store too often", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(loki_embeddedcache_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", cache=\"bloom-blocks-cache\", container=\"bloom-gateway\"}) by (pod)", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(loki_bloom_blocks_cache_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}) by (pod)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Cache size (per pod)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "linearThreshold": 1000, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.fillOpacity", + "value": 25 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Items" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 25 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "unit", + "value": "" + }, + { + "id": "custom.axisSoftMin", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 83, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(loki_bloom_blocks_cache_entries{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"})", + "hide": false, + "instant": false, + "legendFormat": "Items", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_blocks_cache_added_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Added", + "range": true, + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_blocks_cache_evicted_total{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval])) by (reason)", + "hide": false, + "instant": false, + "legendFormat": "Evicted ({{reason}})", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_blocks_cache_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval]))", + "hide": true, + "instant": false, + "legendFormat": "Size", + "range": true, + "refId": "E" + } + ], + "title": "Cache rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "linearThreshold": 1000, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "hit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "miss" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 92, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status) (\n rate(loki_bloom_blocks_cache_fetched_total{container=\"bloom-gateway\"}[$__rate_interval])\n)\n/ ignoring(status) group_left\nsum (\n rate(loki_bloom_blocks_cache_fetched_total{container=\"bloom-gateway\"}[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Hit/Miss ratio", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Size (.*)/" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 76, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(loki_embeddedcache_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", cache=\"bloom-blocks-cache\", container=\"bloom-gateway\"})\n/\nsum(loki_embeddedcache_entries{cluster=\"$cluster\", namespace=\"$namespace\", cache=\"bloom-blocks-cache\", container=\"bloom-gateway\"})", + "instant": false, + "legendFormat": "Size", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(loki_bloom_blocks_cache_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"})\n/\nsum(loki_bloom_blocks_cache_entries{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"})", + "hide": false, + "instant": false, + "legendFormat": "Size", + "range": true, + "refId": "B" + } + ], + "title": "Average item size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.* (blocks|metas) size/" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_store_metas_fetched_sum{cluster=\"$cluster\",namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "metas fetch rate", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(loki_bloom_store_blocks_fetched_sum{cluster=\"$cluster\",namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "blocks fetch rate", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum(rate(loki_bloom_store_blocks_fetched_size_bytes_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "p90 blocks size", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum(rate(loki_bloom_store_metas_fetched_size_bytes_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "p90 metas size", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(1.0, sum(rate(loki_bloom_store_metas_fetched_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(loki_bloom_store_metas_fetched_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(loki_bloom_store_metas_fetched_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "G" + } + ], + "title": "Bloom Store", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Blocks Cache", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 60, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 61, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "title": "The gateway download bloom meta files and blocks from the object store.", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 15 + }, + "id": 24, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "---\n#### GCS\n", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 2, + "y": 15 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status_code, operation) (rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval]))", + "instant": false, + "legendFormat": "{{operation}} {{status_code}}", + "range": true, + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 13, + "y": 15 + }, + "id": 29, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "instant": false, + "legendFormat": "{{operation}} p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by (operation, le) (rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (operation, le) (rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p50", + "range": true, + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 22 + }, + "id": 62, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "---\n#### S3\n", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 2, + "y": 22 + }, + "id": 63, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status_code, operation) (rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval]))", + "instant": false, + "legendFormat": "{{operation}} {{status_code}}", + "range": true, + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 13, + "y": 22 + }, + "id": 64, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "instant": false, + "legendFormat": "{{operation}} p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by (operation, le) (rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (operation, le) (rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p50", + "range": true, + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 29 + }, + "id": 65, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "---\n#### Azure\nBlob Storage\n\n", + "mode": "markdown" + }, + "panels": [ ], + "pluginVersion": "11.1.0-70005", + "targets": [ ], + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 2, + "y": 29 + }, + "id": 66, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (status_code, operation) (rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval]))", + "instant": false, + "legendFormat": "{{operation}} {{status_code}}", + "range": true, + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 11, + "x": 13, + "y": 29 + }, + "id": 67, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "instant": false, + "legendFormat": "{{operation}} p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by (operation, le) (rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (operation, le) (rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} [$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{operation}} p50", + "range": true, + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Object Store", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 77, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 78, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "topk(3, sum by (tasks) (count_over_time({namespace=\"loki-dev-006\", container=\"bloom-gateway\"} |= \"process tasks with bounds\" | logfmt [5s])))", + "legendFormat": "{{tasks}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Process tasks with bounds", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "max", + "avg" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 79, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "max(max_over_time({cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} |= \"fetch blocks\" | logfmt | unwrap duration(duration) [$__auto]))", + "hide": false, + "legendFormat": "max", + "queryType": "range", + "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "avg(avg_over_time({cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} |= \"fetch blocks\" | logfmt | unwrap duration(duration) [$__auto]))", + "hide": false, + "legendFormat": "avg", + "queryType": "range", + "refId": "B" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "avg(avg_over_time({cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"} |= \"request unavailable blocks in the background\" | logfmt | missing > 0 | unwrap missing [$__auto]))", + "hide": false, + "legendFormat": "avg missing", + "queryType": "range", + "refId": "C" + } + ], + "title": "Download enqueue duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 100 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 31 + }, + "id": 80, + "options": { + "barRadius": 0, + "barWidth": 0.96999999999999997, + "fullHighlight": false, + "groupWidth": 0.69999999999999996, + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "horizontal", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "panels": [ ], + "pluginVersion": "11.0.0-67814", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "code", + "expr": "sort_desc(topk(10, sum by (tasks) (count_over_time({namespace=\"loki-dev-006\", container=\"bloom-gateway\"} |= \"process tasks with bounds\" | logfmt [$__auto]))))", + "legendFormat": "", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Tasks multiplexed", + "type": "barchart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Enqueue latency" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 31 + }, + "id": 82, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "panels": [ ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(loki_bloom_store_download_queue_enqueue_time_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval])) by (le))", + "hide": false, + "legendFormat": "Enqueue latency", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(loki_bloom_store_download_queue_size_bucket{cluster=\"$cluster\", namespace=\"$namespace\", container=\"bloom-gateway\"}[$__rate_interval])) by (le))", + "hide": false, + "legendFormat": "Size", + "range": true, + "refId": "B" + } + ], + "title": "Block download queue", + "type": "timeseries" + } + ], + "targets": [ ], + "title": "Misc", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [ ], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Bloom Gateway", + "uid": "bloom-gateway", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/loki-chunks.json b/charts/meta-monitoring/src/dashboards/loki/loki-chunks.json similarity index 89% rename from charts/meta-monitoring/src/dashboards/loki-chunks.json rename to charts/meta-monitoring/src/dashboards/loki/loki-chunks.json index a1254e8..cfe9431 100644 --- a/charts/meta-monitoring/src/dashboards/loki-chunks.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-chunks.json @@ -51,7 +51,6 @@ "overrides": [ ] }, "id": 1, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -65,7 +64,7 @@ "span": 6, "targets": [ { - "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})", + "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"})", "format": "time_series", "legendFormat": "series", "legendLink": null @@ -99,7 +98,6 @@ "overrides": [ ] }, "id": 2, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -113,7 +111,7 @@ "span": 6, "targets": [ { - "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})", + "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"})", "format": "time_series", "legendFormat": "chunks", "legendLink": null @@ -159,7 +157,6 @@ "overrides": [ ] }, "id": 3, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -174,19 +171,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -238,7 +235,6 @@ "overrides": [ ] }, "id": 4, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -253,19 +249,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -329,7 +325,6 @@ "overrides": [ ] }, "id": 5, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -344,19 +339,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -408,7 +403,6 @@ "overrides": [ ] }, "id": 6, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -422,7 +416,7 @@ "span": 6, "targets": [ { - "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Index Entries", "legendLink": null @@ -468,7 +462,6 @@ "overrides": [ ] }, "id": 7, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -482,7 +475,7 @@ "span": 6, "targets": [ { - "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}", + "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -665,7 +658,6 @@ }, "fill": 10, "id": 8, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -681,7 +673,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -727,7 +719,6 @@ "overrides": [ ] }, "id": 9, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -741,7 +732,7 @@ "span": 6, "targets": [ { - "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -775,7 +766,6 @@ "overrides": [ ] }, "id": 10, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -790,7 +780,7 @@ "stack": true, "targets": [ { - "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{reason}}", "legendLink": null @@ -847,14 +837,13 @@ "hideZeroBuckets": false, "highlightCards": true, "id": 11, - "interval": "1m", "legend": { "show": true }, "span": 12, "targets": [ { - "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]))", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -910,14 +899,13 @@ "hideZeroBuckets": false, "highlightCards": true, "id": 12, - "interval": "1m", "legend": { "show": true }, "span": 12, "targets": [ { - "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)", + "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -980,7 +968,6 @@ "overrides": [ ] }, "id": 13, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -994,19 +981,19 @@ "span": 12, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le))", "format": "time_series", "legendFormat": "p99", "legendLink": null }, { - "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le))", "format": "time_series", "legendFormat": "p90", "legendLink": null }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le))", "format": "time_series", "legendFormat": "p50", "legendLink": null @@ -1052,7 +1039,6 @@ "overrides": [ ] }, "id": 14, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1066,19 +1052,19 @@ "span": 12, "targets": [ { - "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le))", "format": "time_series", "legendFormat": "p50", "legendLink": null }, { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) by (le))", "format": "time_series", "legendFormat": "p99", "legendLink": null }, { - "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/.*ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "avg", "legendLink": null diff --git a/charts/meta-monitoring/src/dashboards/loki-deletion.json b/charts/meta-monitoring/src/dashboards/loki/loki-deletion.json similarity index 95% rename from charts/meta-monitoring/src/dashboards/loki-deletion.json rename to charts/meta-monitoring/src/dashboards/loki/loki-deletion.json index c127724..7b048f7 100644 --- a/charts/meta-monitoring/src/dashboards/loki-deletion.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-deletion.json @@ -35,7 +35,6 @@ "fill": 1, "format": "none", "id": 1, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -111,7 +110,6 @@ "fill": 1, "format": "dtdurations", "id": 2, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -215,7 +213,6 @@ "overrides": [ ] }, "id": 3, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -263,7 +260,6 @@ "overrides": [ ] }, "id": 4, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -311,7 +307,6 @@ "overrides": [ ] }, "id": 5, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -371,7 +366,6 @@ "overrides": [ ] }, "id": 6, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -419,7 +413,6 @@ "overrides": [ ] }, "id": 7, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -467,7 +460,6 @@ "overrides": [ ] }, "id": 8, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -527,7 +519,6 @@ "overrides": [ ] }, "id": 9, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -575,7 +566,6 @@ "overrides": [ ] }, "id": 10, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -589,7 +579,7 @@ "span": 6, "targets": [ { - "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"}[$__rate_interval])) by (user)", + "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval])) by (user)", "format": "time_series", "legendFormat": "{{user}}", "legendLink": null @@ -613,11 +603,10 @@ { "datasource": "$loki_datasource", "id": 11, - "interval": "1m", "span": 6, "targets": [ { - "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ", + "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ", "refId": "A" } ], @@ -627,11 +616,10 @@ { "datasource": "$loki_datasource", "id": 12, - "interval": "1m", "span": 6, "targets": [ { - "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*/compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"", + "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"", "refId": "A" } ], diff --git a/charts/meta-monitoring/src/dashboards/loki-logs.json b/charts/meta-monitoring/src/dashboards/loki/loki-logs.json similarity index 98% rename from charts/meta-monitoring/src/dashboards/loki-logs.json rename to charts/meta-monitoring/src/dashboards/loki/loki-logs.json index 84ad5a3..32b8e52 100644 --- a/charts/meta-monitoring/src/dashboards/loki-logs.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-logs.json @@ -38,7 +38,6 @@ }, "hiddenSeries": false, "id": 35, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -130,7 +129,6 @@ }, "hiddenSeries": false, "id": 41, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -218,7 +216,6 @@ }, "hiddenSeries": false, "id": 36, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -310,7 +307,6 @@ }, "hiddenSeries": false, "id": 40, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -402,7 +398,6 @@ }, "hiddenSeries": false, "id": 38, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -494,7 +489,6 @@ }, "hiddenSeries": false, "id": 39, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -581,7 +575,6 @@ }, "hiddenSeries": false, "id": 37, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -674,7 +667,6 @@ }, "hiddenSeries": false, "id": 42, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -767,7 +759,6 @@ }, "hiddenSeries": false, "id": 31, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -810,7 +801,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__interval])) by (level)", + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__auto])) by (level)", "intervalFactor": 3, "legendFormat": "{{level}}", "refId": "A" @@ -866,7 +857,6 @@ "y": 6 }, "id": 29, - "interval": "1m", "maxDataPoints": "", "options": { "showLabels": false, diff --git a/charts/meta-monitoring/src/dashboards/loki/loki-mixin-recording-rules.json b/charts/meta-monitoring/src/dashboards/loki/loki-mixin-recording-rules.json new file mode 100644 index 0000000..f1f6c21 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/loki/loki-mixin-recording-rules.json @@ -0,0 +1,724 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "iteration": 1635347545534, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "liveNow": false, + "panels": [ + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 2, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.3.0-38205pre", + "targets": [ + { + "datasource": "${datasource}", + "exemplar": false, + "expr": "sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Appenders Not Ready", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 11, + "x": 2, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Samples Appended to WAL per Second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Series are unique combinations of labels", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 11, + "x": 13, + "y": 0 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Series Created per Second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Write Behind", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Samples Sent per Second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "WAL Disk Size", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Some number of pending samples is expected, but if remote-write is failing this value will remain high", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) > 0", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Pending Samples", + "type": "timeseries" + } + ], + "refresh": "10s", + "rows": [ ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [ ], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "tenant", + "options": [ ], + "query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster=\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))", + "refresh": 0, + "regex": "/\"([^\"]+)\"/", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Recording Rules", + "uid": "recording-rules", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/loki-operational.json b/charts/meta-monitoring/src/dashboards/loki/loki-operational.json similarity index 71% rename from charts/meta-monitoring/src/dashboards/loki-operational.json rename to charts/meta-monitoring/src/dashboards/loki/loki-operational.json index e0490b1..7d28595 100644 --- a/charts/meta-monitoring/src/dashboards/loki-operational.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-operational.json @@ -32,7 +32,6 @@ "y": 0 }, "id": 17, - "interval": "1m", "panels": [ ], "targets": [ ], "title": "Main", @@ -62,7 +61,6 @@ }, "hiddenSeries": false, "id": 6, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -89,7 +87,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/(.*query-frontend|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)", + "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)", "legendFormat": "{{status}}", "refId": "A" } @@ -159,7 +157,6 @@ }, "hiddenSeries": false, "id": 7, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -186,7 +183,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))", + "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))", "legendFormat": "{{status}}", "refId": "A" } @@ -232,6 +229,103 @@ "alignLevel": null } }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { }, + "unit": "ops" + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(5, sum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval])) - \nsum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval] offset 1h)))", + "legendFormat": "{{name}}-{{level}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Bad Words", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": { }, "bars": false, @@ -254,7 +348,7 @@ }, "hiddenSeries": false, "id": 2, - "interval": "1m", + "interval": "", "legend": { "avg": false, "current": false, @@ -350,7 +444,6 @@ }, "hiddenSeries": false, "id": 4, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -447,7 +540,6 @@ }, "hiddenSeries": false, "id": 24, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -531,7 +623,7 @@ "fieldConfig": { "defaults": { "custom": { }, - "unit": "s" + "unit": "ms" }, "overrides": [ ] }, @@ -545,7 +637,6 @@ }, "hiddenSeries": false, "id": 9, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -574,17 +665,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".5", "refId": "C" } @@ -639,7 +730,7 @@ "fieldConfig": { "defaults": { "custom": { }, - "unit": "s" + "unit": "ms" }, "overrides": [ ] }, @@ -653,7 +744,6 @@ }, "hiddenSeries": false, "id": 12, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -682,17 +772,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".5", "refId": "C" } @@ -760,7 +850,6 @@ }, "hiddenSeries": false, "id": 71, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -789,7 +878,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (route) > 0", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" @@ -846,7 +935,7 @@ "fieldConfig": { "defaults": { "custom": { }, - "unit": "s" + "unit": "ms" }, "overrides": [ ] }, @@ -860,7 +949,6 @@ }, "hiddenSeries": false, "id": 13, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -889,18 +977,18 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "hide": false, "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "hide": false, "legendFormat": ".5", "refId": "C" @@ -969,7 +1057,6 @@ }, "hiddenSeries": false, "id": 72, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -998,7 +1085,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" @@ -1055,7 +1142,7 @@ "fieldConfig": { "defaults": { "custom": { }, - "unit": "s" + "unit": "ms" }, "overrides": [ ] }, @@ -1069,7 +1156,6 @@ }, "hiddenSeries": false, "id": 10, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -1100,17 +1186,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.5", "refId": "C" } @@ -1165,7 +1251,7 @@ "fieldConfig": { "defaults": { "custom": { }, - "unit": "s" + "unit": "ms" }, "overrides": [ ] }, @@ -1179,7 +1265,6 @@ }, "hiddenSeries": false, "id": 14, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -1208,17 +1293,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".99-{{route}}", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".9-{{route}}", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".5-{{route}}", "refId": "C" } @@ -1286,7 +1371,6 @@ }, "hiddenSeries": false, "id": 73, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -1315,7 +1399,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\"}[$__rate_interval])) by (route) > 0", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" @@ -1373,7 +1457,7 @@ "fieldConfig": { "defaults": { "custom": { }, - "unit": "s" + "unit": "ms" }, "overrides": [ ] }, @@ -1387,7 +1471,6 @@ }, "hiddenSeries": false, "id": 15, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -1416,17 +1499,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".99-{{route}}", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".9-{{route}}", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".5-{{route}}", "refId": "C" } @@ -1494,7 +1577,6 @@ }, "hiddenSeries": false, "id": 74, - "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -1523,7 +1605,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" @@ -1598,7 +1680,6 @@ }, "hiddenSeries": false, "id": 112, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -1673,9 +1754,93 @@ } }, { - "columns": [ ], "datasource": "$datasource", - "fontSize": "100%", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "right", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tenant" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "reason" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, @@ -1683,72 +1848,20 @@ "y": 27 }, "id": 113, - "interval": "1m", - "pageSize": null, - "panels": [ ], - "showHeader": true, - "sort": { - "col": 3, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "tenant", - "thresholds": [ ], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "reason", - "thresholds": [ ], - "type": "number", - "unit": "short" + "show": false }, - { - "alias": "", - "align": "right", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "number", - "unit": "short" - } - ], + "showHeader": true + }, + "panels": [ ], + "pluginVersion": "10.4.0", "targets": [ { "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])[$__range:$__rate_interval])))", @@ -1759,11 +1872,16 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Discarded Lines Per Interval", - "transform": "table", - "type": "table-old" + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [ ] + } + } + ], + "type": "table" } ], "targets": [ ], @@ -1797,7 +1915,6 @@ }, "hiddenSeries": false, "id": 26, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -1824,7 +1941,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write.*|$namespace-[0-9]+)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"distributor.*\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" @@ -1892,7 +2009,6 @@ }, "hiddenSeries": false, "id": 27, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -1921,7 +2037,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|$namespace/(loki|enterprise-logs)-write|.*/loki|$namespace/loki-single-binary)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"distributor.*\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", @@ -1985,7 +2101,6 @@ }, "hiddenSeries": false, "id": 31, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2017,7 +2132,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))", + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[$__auto]))", "refId": "A" } ], @@ -2071,7 +2186,6 @@ "y": 32 }, "id": 29, - "interval": "1m", "options": { "showLabels": false, "showTime": false, @@ -2081,7 +2195,7 @@ "panels": [ ], "targets": [ { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor.*|(loki|enterprise-logs)-write|loki-single-binary)\"} |= \"level=error\"", + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} |= \"level=error\"", "refId": "A" } ], @@ -2106,7 +2220,6 @@ }, "hiddenSeries": false, "id": 33, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2133,7 +2246,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor.*|(loki|enterprise-logs)-write|loki-single-binary)\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*distributor.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (route) > 0", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (route) > 0", "interval": "", "intervalFactor": 1, "legendFormat": "{{route}}", @@ -2202,7 +2315,6 @@ }, "hiddenSeries": false, "id": 32, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2297,7 +2409,6 @@ }, "hiddenSeries": false, "id": 34, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2392,7 +2503,6 @@ }, "hiddenSeries": false, "id": 35, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2498,7 +2608,6 @@ }, "hiddenSeries": false, "id": 36, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2525,7 +2634,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"ingester.*\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" @@ -2593,7 +2702,6 @@ }, "hiddenSeries": false, "id": 37, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2622,7 +2730,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"ingester.*\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", @@ -2686,7 +2794,6 @@ }, "hiddenSeries": false, "id": 38, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2718,7 +2825,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))", + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[$__auto]))", "refId": "A" } ], @@ -2772,7 +2879,6 @@ "y": 32 }, "id": 39, - "interval": "1m", "options": { "showLabels": false, "showTime": false, @@ -2782,7 +2888,7 @@ "panels": [ ], "targets": [ { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"} |= \"level=error\"", + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} |= \"level=error\"", "refId": "A" } ], @@ -2807,7 +2913,6 @@ }, "hiddenSeries": false, "id": 67, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2834,7 +2939,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (route) > 0", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"}[$__rate_interval])) by (route) > 0", "interval": "", "intervalFactor": 1, "legendFormat": "{{route}}", @@ -2914,7 +3019,6 @@ }, "hiddenSeries": false, "id": 106, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -2943,7 +3047,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}))", + "expr": "topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}))", "interval": "", "legendFormat": "{{ tenant }}", "refId": "A" @@ -3006,7 +3110,6 @@ }, "hiddenSeries": false, "id": 108, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -3035,7 +3138,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]) > 0))", + "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval]) > 0))", "interval": "", "legendFormat": "{{ tenant }}", "refId": "A" @@ -3114,7 +3217,6 @@ }, "hiddenSeries": false, "id": 102, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -3146,13 +3248,13 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval]))", "interval": "", "legendFormat": "Chunks", "refId": "A" }, { - "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) < 1", + "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval])) < 1", "interval": "", "legendFormat": "De-Dupe Ratio", "refId": "B" @@ -3223,7 +3325,6 @@ "hideZeroBuckets": false, "highlightCards": true, "id": 100, - "interval": "1m", "legend": { "show": true }, @@ -3231,7 +3332,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval])) by (le)", + "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -3281,7 +3382,6 @@ }, "hiddenSeries": false, "id": 96, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -3308,7 +3408,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))", "interval": "", "legendFormat": "{{ reason }}" } @@ -3380,7 +3480,6 @@ "hideZeroBuckets": true, "highlightCards": true, "id": 98, - "interval": "1m", "legend": { "show": true }, @@ -3388,7 +3487,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval]))", "format": "heatmap", "instant": false, "interval": "", @@ -3454,7 +3553,6 @@ }, "hiddenSeries": false, "id": 68, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -3481,7 +3579,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"querier.*\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" @@ -3549,7 +3647,6 @@ }, "hiddenSeries": false, "id": 69, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -3578,7 +3675,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|.*loki-single-binary|$namespace-[0-9]+)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"querier.*\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", @@ -3642,7 +3739,6 @@ }, "hiddenSeries": false, "id": 65, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -3674,7 +3770,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"} | logfmt | level=\"error\"[$__interval]))", + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[$__auto]))", "refId": "A" } ], @@ -3737,7 +3833,7 @@ "panels": [ ], "targets": [ { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(querier|(loki|enterprise-logs)-read|loki-single-binary)\"} |= \"level=error\"", + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} |= \"level=error\"", "refId": "A" } ], @@ -3762,7 +3858,6 @@ }, "hiddenSeries": false, "id": 70, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -3789,7 +3884,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"}[$__rate_interval])) by (route) > 0", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (route) > 0", "interval": "", "intervalFactor": 1, "legendFormat": "{{route}}", @@ -3874,7 +3969,7 @@ }, "hiddenSeries": false, "id": 53, - "interval": "1m", + "interval": "", "legend": { "alignAsTable": true, "avg": false, @@ -3978,7 +4073,7 @@ }, "hiddenSeries": false, "id": 54, - "interval": "1m", + "interval": "", "legend": { "alignAsTable": true, "avg": false, @@ -4091,7 +4186,7 @@ }, "hiddenSeries": false, "id": 55, - "interval": "1m", + "interval": "", "legend": { "alignAsTable": true, "avg": false, @@ -4200,7 +4295,7 @@ }, "hiddenSeries": false, "id": 58, - "interval": "1m", + "interval": "", "legend": { "alignAsTable": true, "avg": false, @@ -4288,9 +4383,9 @@ "h": 1, "w": 24, "x": 0, - "y": 35 + "y": 34 }, - "id": 60, + "id": 43, "panels": [ { "aliasColors": { }, @@ -4306,22 +4401,20 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 24, + "h": 7, + "w": 6, "x": 0, - "y": 33 + "y": 9 }, "hiddenSeries": false, - "id": 61, - "interval": "1m", + "id": 41, + "interval": "", "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, - "show": true, + "show": false, "total": false, "values": false }, @@ -4342,21 +4435,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))", "intervalFactor": 1, - "legendFormat": ".99-{{operation}}", + "legendFormat": ".9", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", - "hide": false, - "legendFormat": ".9-{{operation}}", + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", - "hide": false, - "legendFormat": ".5-{{operation}}", + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))", "refId": "C" } ], @@ -4364,7 +4453,7 @@ "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Latency By Operation", + "title": "MutateRows Latency", "tooltip": { "shared": true, "sort": 2, @@ -4407,25 +4496,28 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 41 + "h": 7, + "w": 6, + "x": 6, + "y": 9 }, "hiddenSeries": false, - "id": 62, - "interval": "1m", + "id": 46, + "interval": "", "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, - "show": true, + "show": false, "total": false, "values": false }, @@ -4446,17 +4538,30 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))", + "interval": "", "intervalFactor": 1, - "legendFormat": "{{status_code}}-{{operation}}", + "legendFormat": "99%", "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "legendFormat": "90%", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "legendFormat": "50%", + "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Status By Method", + "title": "ReadRows Latency", "tooltip": { "shared": true, "sort": 2, @@ -4492,23 +4597,7 @@ "align": false, "alignLevel": null } - } - ], - "targets": [ ], - "title": "GCS", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 78, - "panels": [ + }, { "aliasColors": { }, "bars": false, @@ -4523,21 +4612,20 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 10 + "h": 7, + "w": 6, + "x": 12, + "y": 9 }, - "id": 79, - "interval": "1m", + "hiddenSeries": false, + "id": 44, + "interval": "", "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, - "show": true, + "show": false, "total": false, "values": false }, @@ -4558,21 +4646,22 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))", + "interval": "", "intervalFactor": 1, - "legendFormat": ".99-{{operation}}", + "legendFormat": "99%", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", - "hide": false, - "legendFormat": ".9-{{operation}}", + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "legendFormat": "90%", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", - "hide": false, - "legendFormat": ".5-{{operation}}", + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "legendFormat": "50%", "refId": "C" } ], @@ -4580,7 +4669,7 @@ "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Latency By Operation", + "title": "GetTable Latency", "tooltip": { "shared": true, "sort": 2, @@ -4623,24 +4712,28 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 18 + "h": 7, + "w": 6, + "x": 18, + "y": 9 }, - "id": 80, - "interval": "1m", + "hiddenSeries": false, + "id": 45, + "interval": "", "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, - "show": true, + "show": false, "total": false, "values": false }, @@ -4661,17 +4754,25 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))", "intervalFactor": 1, - "legendFormat": "{{status_code}}-{{operation}}", + "legendFormat": ".9", "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))", + "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Status By Method", + "title": "ListTables Latency", "tooltip": { "shared": true, "sort": 2, @@ -4707,14 +4808,1482 @@ "align": false, "alignLevel": null } - } - ], - "targets": [ ], - "title": "S3", - "type": "row" - }, - { - "collapsed": true, + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 47, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "MutateRows Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 16 + }, + "hiddenSeries": false, + "id": 50, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "ReadRows Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 48, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "GetTable Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 16 + }, + "hiddenSeries": false, + "id": 49, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "ListTables Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Big Table", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 60, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 61, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 41 + }, + "hiddenSeries": false, + "id": 62, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "GCS", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 76, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 9 + }, + "id": 82, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Failure Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 9 + }, + "id": 83, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Consumed Capacity Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 9 + }, + "id": 84, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Throttled Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 9 + }, + "id": 85, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Dropped Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 15 + }, + "id": 86, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Query Pages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 15 + }, + "id": 87, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 15 + }, + "id": 88, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Dynamo", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 78, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 79, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 80, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "S3", + "type": "row" + }, + { + "collapsed": true, "datasource": null, "gridPos": { "h": 1, @@ -4744,7 +6313,7 @@ "y": 10 }, "id": 79, - "interval": "1m", + "interval": "", "legend": { "alignAsTable": true, "avg": false, @@ -4847,7 +6416,7 @@ "y": 18 }, "id": 80, - "interval": "1m", + "interval": "", "legend": { "alignAsTable": true, "avg": false, @@ -4959,7 +6528,7 @@ "y": 10 }, "id": 115, - "interval": "1m", + "interval": "", "legend": { "alignAsTable": true, "avg": false, @@ -5062,7 +6631,7 @@ "y": 18 }, "id": 116, - "interval": "1m", + "interval": "", "legend": { "alignAsTable": true, "avg": false, diff --git a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json b/charts/meta-monitoring/src/dashboards/loki/loki-reads-resources.json similarity index 90% rename from charts/meta-monitoring/src/dashboards/loki-reads-resources.json rename to charts/meta-monitoring/src/dashboards/loki/loki-reads-resources.json index b6c186b..a718820 100644 --- a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-reads-resources.json @@ -90,7 +90,6 @@ ] }, "id": 1, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -104,19 +103,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -192,7 +191,6 @@ ] }, "id": 2, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -206,19 +204,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -255,7 +253,6 @@ "overrides": [ ] }, "id": 3, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -269,7 +266,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|loki-read|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -357,7 +354,6 @@ ] }, "id": 4, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -371,19 +367,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -459,7 +455,6 @@ ] }, "id": 5, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -473,19 +468,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -522,7 +517,6 @@ "overrides": [ ] }, "id": 6, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -536,7 +530,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-scheduler|loki-read|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-scheduler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -625,7 +619,6 @@ }, "gridPos": { }, "id": 7, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -638,19 +631,19 @@ }, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -727,7 +720,6 @@ }, "gridPos": { }, "id": 8, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -740,19 +732,19 @@ }, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -790,7 +782,6 @@ }, "gridPos": { }, "id": 9, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -803,7 +794,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|loki-read|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -841,7 +832,6 @@ }, "gridPos": { }, "id": 10, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -854,7 +844,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -889,7 +879,6 @@ }, "gridPos": { }, "id": 11, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -902,7 +891,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -937,7 +926,6 @@ }, "gridPos": { }, "id": 12, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -950,7 +938,7 @@ }, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"querier.*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*querier.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*querier.*\"})", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -1037,7 +1025,6 @@ }, "gridPos": { }, "id": 13, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1050,19 +1037,19 @@ }, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1139,7 +1126,6 @@ }, "gridPos": { }, "id": 14, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1152,19 +1138,19 @@ }, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1202,7 +1188,6 @@ }, "gridPos": { }, "id": 15, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1215,7 +1200,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1253,7 +1238,6 @@ }, "gridPos": { }, "id": 16, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1266,7 +1250,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1301,7 +1285,6 @@ }, "gridPos": { }, "id": 17, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1314,7 +1297,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|index-gateway\", pod=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary)\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1349,7 +1332,6 @@ }, "gridPos": { }, "id": 18, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1362,7 +1344,7 @@ }, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(.*index-gateway.*|(loki|enterprise-logs)-read.*|loki-single-binary).*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*index-gateway.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*index-gateway.*\"})", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -1449,7 +1431,6 @@ }, "gridPos": { }, "id": 19, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1462,19 +1443,19 @@ }, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1551,7 +1532,6 @@ }, "gridPos": { }, "id": 20, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1564,19 +1544,19 @@ }, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1614,7 +1594,6 @@ }, "gridPos": { }, "id": 21, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1627,7 +1606,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|loki-read|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1665,7 +1644,6 @@ }, "gridPos": { }, "id": 22, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1678,7 +1656,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1713,7 +1691,6 @@ }, "gridPos": { }, "id": 23, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1726,7 +1703,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1761,7 +1738,6 @@ }, "gridPos": { }, "id": 24, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1774,7 +1750,7 @@ }, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-gateway.*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*bloom-gateway.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*bloom-gateway.*\"})", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -1860,7 +1836,6 @@ ] }, "id": 25, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1874,19 +1849,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1962,7 +1937,6 @@ ] }, "id": 26, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1976,19 +1950,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -2025,7 +1999,6 @@ "overrides": [ ] }, "id": 27, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -2039,7 +2012,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.+\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -2089,7 +2062,6 @@ }, "gridPos": { }, "id": 28, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -2176,7 +2148,6 @@ }, "gridPos": { }, "id": 29, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -2189,19 +2160,19 @@ }, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -2278,7 +2249,6 @@ }, "gridPos": { }, "id": 30, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -2291,19 +2261,19 @@ }, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -2341,7 +2311,6 @@ }, "gridPos": { }, "id": 31, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -2354,7 +2323,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*ruler|loki-backend|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null diff --git a/charts/meta-monitoring/src/dashboards/loki-reads.json b/charts/meta-monitoring/src/dashboards/loki/loki-reads.json similarity index 77% rename from charts/meta-monitoring/src/dashboards/loki-reads.json rename to charts/meta-monitoring/src/dashboards/loki/loki-reads.json index 72e2d84..2d63218 100644 --- a/charts/meta-monitoring/src/dashboards/loki-reads.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-reads.json @@ -200,7 +200,6 @@ }, "fill": 10, "id": 1, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -216,7 +215,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*query-frontend|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -250,7 +249,6 @@ "overrides": [ ] }, "id": 2, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -265,28 +263,22 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) ", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -335,7 +327,6 @@ "overrides": [ ] }, "id": 3, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -350,7 +341,7 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -548,7 +539,6 @@ }, "fill": 10, "id": 4, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -564,7 +554,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -598,7 +588,6 @@ "overrides": [ ] }, "id": 5, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -613,28 +602,22 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) ", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -683,7 +666,6 @@ "overrides": [ ] }, "id": 6, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -698,7 +680,7 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -896,7 +878,6 @@ }, "fill": 10, "id": 7, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -912,7 +893,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -946,7 +927,6 @@ "overrides": [ ] }, "id": 8, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -961,28 +941,22 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -1031,7 +1005,6 @@ "overrides": [ ] }, "id": 9, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1046,7 +1019,7 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1244,7 +1217,6 @@ }, "fill": 10, "id": 10, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -1260,7 +1232,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester-zone-.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1294,7 +1266,6 @@ "overrides": [ ] }, "id": 11, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1309,28 +1280,22 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone-.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone-.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone-.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone-.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -1379,7 +1344,6 @@ "overrides": [ ] }, "id": 12, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1394,7 +1358,7 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone-.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1592,7 +1556,6 @@ }, "fill": 10, "id": 13, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -1608,7 +1571,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*index-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1642,7 +1605,6 @@ "overrides": [ ] }, "id": 14, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1657,28 +1619,22 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*index-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*index-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*index-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*index-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -1727,7 +1683,6 @@ "overrides": [ ] }, "id": 15, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1742,7 +1697,7 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/(.*index-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1940,7 +1895,6 @@ }, "fill": 10, "id": 16, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -1956,7 +1910,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*bloom-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1990,7 +1944,6 @@ "overrides": [ ] }, "id": 17, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -2005,28 +1958,22 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ route }} 50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(bloom-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -2075,7 +2022,6 @@ "overrides": [ ] }, "id": 18, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -2090,7 +2036,7 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|(loki|enterprise-logs)-backend|loki-single-binary)\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -2288,7 +2234,6 @@ }, "fill": 10, "id": 19, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -2304,7 +2249,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -2338,7 +2283,6 @@ "overrides": [ ] }, "id": 20, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -2353,19 +2297,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", operation!=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|(loki|enterprise-logs)-read|loki-single-binary)\", operation!=\"index_chunk\"}[$__rate_interval]))", + "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2417,7 +2361,6 @@ "overrides": [ ] }, "id": 21, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -2432,7 +2375,7 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|(loki|enterprise-logs)-read|loki-single-binary)\", operation!=\"index_chunk\"}[$__rate_interval])) by (le,pod)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le,pod)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -2449,7 +2392,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "TSDB Index", + "title": "TSBD Index", "titleSize": "h6" }, { @@ -2630,7 +2573,6 @@ }, "fill": 10, "id": 22, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -2646,7 +2588,7 @@ "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|.*index-gateway|(loki|enterprise-logs)-read|loki-single-binary)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -2680,7 +2622,6 @@ "overrides": [ ] }, "id": 23, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -2695,19 +2636,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|.*index-gateway|(loki|enterprise-logs)-read|loki-single-binary)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|.*index-gateway|(loki|enterprise-logs)-read|loki-single-binary)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|.*index-gateway|(loki|enterprise-logs)-read|loki-single-binary)\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|.*index-gateway|(loki|enterprise-logs)-read|loki-single-binary)\", operation=\"Shipper.Query\"}[$__rate_interval]))", + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2759,7 +2700,6 @@ "overrides": [ ] }, "id": 24, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -2774,7 +2714,7 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*querier|.*index-gateway|(loki|enterprise-logs)-read|loki-single-binary)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le,pod)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le,pod)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -2897,4 +2837,4 @@ "title": "Loki / Reads", "uid": "reads", "version": 0 - } + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/loki-retention.json b/charts/meta-monitoring/src/dashboards/loki/loki-retention.json similarity index 96% rename from charts/meta-monitoring/src/dashboards/loki-retention.json rename to charts/meta-monitoring/src/dashboards/loki/loki-retention.json index 2b94bc7..70c5171 100644 --- a/charts/meta-monitoring/src/dashboards/loki-retention.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-retention.json @@ -90,7 +90,6 @@ ] }, "id": 1, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -104,19 +103,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -192,7 +191,6 @@ ] }, "id": 2, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -206,19 +204,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*compactor.*|(loki|enterprise-logs)-backend.*|loki-single-binary)\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -255,7 +253,6 @@ "overrides": [ ] }, "id": 3, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -269,7 +266,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -320,7 +317,6 @@ }, "fill": 1, "id": 4, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -427,7 +423,6 @@ "overrides": [ ] }, "id": 5, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -487,7 +482,6 @@ "overrides": [ ] }, "id": 6, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -535,7 +529,6 @@ "overrides": [ ] }, "id": 7, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -597,7 +590,6 @@ }, "fill": 1, "id": 8, - "interval": "1m", "legend": { "avg": false, "current": false, @@ -704,7 +696,6 @@ "overrides": [ ] }, "id": 9, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -752,7 +743,6 @@ "overrides": [ ] }, "id": 10, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -812,7 +802,6 @@ "overrides": [ ] }, "id": 11, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -860,7 +849,6 @@ "overrides": [ ] }, "id": 12, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -908,7 +896,6 @@ "overrides": [ ] }, "id": 13, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -969,7 +956,6 @@ }, "format": "short", "id": 14, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1018,7 +1004,6 @@ "overrides": [ ] }, "id": 15, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1110,7 +1095,6 @@ }, "format": "short", "id": 16, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1159,7 +1143,6 @@ "overrides": [ ] }, "id": 17, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1250,7 +1233,6 @@ "overrides": [ ] }, "id": 18, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1298,7 +1280,6 @@ "overrides": [ ] }, "id": 19, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1346,7 +1327,6 @@ "overrides": [ ] }, "id": 20, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -1387,7 +1367,7 @@ "span": 12, "targets": [ { - "expr": "{cluster=~\"$cluster\", job=~\"($namespace)/(.*compactor|(loki|enterprise-logs)-backend.*|loki-single-binary)\"}", + "expr": "{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"}", "refId": "A" } ], diff --git a/charts/meta-monitoring/src/dashboards/loki-writes-resources.json b/charts/meta-monitoring/src/dashboards/loki/loki-writes-resources.json similarity index 91% rename from charts/meta-monitoring/src/dashboards/loki-writes-resources.json rename to charts/meta-monitoring/src/dashboards/loki/loki-writes-resources.json index 11c15cb..33218bb 100644 --- a/charts/meta-monitoring/src/dashboards/loki-writes-resources.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-writes-resources.json @@ -90,7 +90,6 @@ ] }, "id": 1, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -104,7 +103,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -116,7 +115,7 @@ "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -192,7 +191,6 @@ ] }, "id": 2, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -206,7 +204,7 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -218,7 +216,7 @@ "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -255,7 +253,6 @@ "overrides": [ ] }, "id": 3, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -269,7 +266,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|loki-write|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -319,7 +316,6 @@ }, "gridPos": { }, "id": 4, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -332,7 +328,7 @@ }, "targets": [ { - "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})", + "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -409,7 +405,6 @@ }, "gridPos": { }, "id": 5, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -422,19 +417,19 @@ }, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -511,7 +506,6 @@ }, "gridPos": { }, "id": 6, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -524,19 +518,19 @@ }, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -574,7 +568,6 @@ }, "gridPos": { }, "id": 7, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -587,7 +580,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -625,7 +618,6 @@ }, "gridPos": { }, "id": 8, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -638,7 +630,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -673,7 +665,6 @@ }, "gridPos": { }, "id": 9, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -686,7 +677,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"loki|ingester\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary)\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -721,7 +712,6 @@ }, "gridPos": { }, "id": 10, - "interval": "1m", "links": [ ], "options": { "legend": { @@ -734,7 +724,7 @@ }, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary).*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*ingester.*.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*ingester.*.*\"})", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null diff --git a/charts/meta-monitoring/src/dashboards/loki-writes.json b/charts/meta-monitoring/src/dashboards/loki/loki-writes.json similarity index 80% rename from charts/meta-monitoring/src/dashboards/loki-writes.json rename to charts/meta-monitoring/src/dashboards/loki/loki-writes.json index cc6a1af..6a967cd 100644 --- a/charts/meta-monitoring/src/dashboards/loki-writes.json +++ b/charts/meta-monitoring/src/dashboards/loki/loki-writes.json @@ -200,7 +200,6 @@ }, "fill": 10, "id": 1, - "interval": "1m", "linewidth": 0, "links": [ ], "options": { @@ -212,11 +211,11 @@ "sort": "none" } }, - "span": 6, + "span": 4, "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -250,7 +249,6 @@ "overrides": [ ] }, "id": 2, - "interval": "1m", "links": [ ], "nullPointMode": "null as zero", "options": { @@ -262,31 +260,25 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs|/httpgrpc.HTTP/Handle\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs|/httpgrpc.HTTP/Handle\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs|/httpgrpc.HTTP/Handle\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs|/httpgrpc.HTTP/Handle\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -309,6 +301,57 @@ "show": false } ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs|/httpgrpc.HTTP/Handle\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "title": "Per Pod Latency (p99)", + "type": "timeseries" } ], "repeat": null, @@ -346,8 +389,7 @@ }, "overrides": [ ] }, - "id": 3, - "interval": "1m", + "id": 4, "links": [ ], "options": { "legend": { @@ -361,7 +403,7 @@ "span": 6, "targets": [ { - "expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\",}[$__rate_interval]))", + "expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", "format": "time_series", "legendFormat": "bytes", "legendLink": null @@ -394,8 +436,7 @@ }, "overrides": [ ] }, - "id": 4, - "interval": "1m", + "id": 5, "links": [ ], "options": { "legend": { @@ -410,7 +451,7 @@ "stack": true, "targets": [ { - "expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*distributor|(loki|enterprise-logs)-write|loki-single-binary)\",}[$__rate_interval]))", + "expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{tenant}}", "legendLink": null @@ -622,8 +663,7 @@ ] }, "fill": 10, - "id": 5, - "interval": "1m", + "id": 6, "linewidth": 0, "links": [ ], "options": { @@ -635,11 +675,11 @@ "sort": "none" } }, - "span": 6, + "span": 4, "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -672,8 +712,7 @@ }, "overrides": [ ] }, - "id": 6, - "interval": "1m", + "id": 7, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -685,31 +724,25 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester-zone.*|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -732,6 +765,57 @@ "show": false } ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "title": "Per Pod Latency (p99)", + "type": "timeseries" } ], "repeat": null, @@ -918,8 +1002,7 @@ ] }, "fill": 10, - "id": 7, - "interval": "1m", + "id": 9, "linewidth": 0, "links": [ ], "options": { @@ -931,11 +1014,11 @@ "sort": "none" } }, - "span": 6, + "span": 4, "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -968,8 +1051,7 @@ }, "overrides": [ ] }, - "id": 8, - "interval": "1m", + "id": 10, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -981,31 +1063,25 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A", - "step": 10 + "legendFormat": "99th percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B", - "step": 10 + "legendFormat": "50th percentile", + "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", route=\"/logproto.Pusher/Push\"})", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", - "refId": "C", - "step": 10 + "refId": "C" } ], "title": "Latency", @@ -1028,6 +1104,57 @@ "show": false } ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "title": "Per Pod Latency (p99)", + "type": "timeseries" } ], "repeat": null, @@ -1214,8 +1341,7 @@ ] }, "fill": 10, - "id": 9, - "interval": "1m", + "id": 12, "linewidth": 0, "links": [ ], "options": { @@ -1227,11 +1353,11 @@ "sort": "none" } }, - "span": 6, + "span": 4, "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1264,8 +1390,7 @@ }, "overrides": [ ] }, - "id": 10, - "interval": "1m", + "id": 13, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1277,22 +1402,22 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"index_chunk\"}[$__rate_interval]))", + "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1318,6 +1443,57 @@ "show": false } ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "title": "Per Pod Latency (p99)", + "type": "timeseries" } ], "repeat": null, @@ -1504,8 +1680,7 @@ ] }, "fill": 10, - "id": 11, - "interval": "1m", + "id": 15, "linewidth": 0, "links": [ ], "options": { @@ -1517,11 +1692,11 @@ "sort": "none" } }, - "span": 6, + "span": 4, "stack": true, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1554,8 +1729,7 @@ }, "overrides": [ ] }, - "id": 12, - "interval": "1m", + "id": 16, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1567,22 +1741,22 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\", operation=\"WRITE\"}[$__rate_interval]))", + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1608,13 +1782,64 @@ "show": false } ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "title": "Per Pod Latency (p99)", + "type": "timeseries" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "BoltDB Shipper", + "title": "BoltDB Index", "titleSize": "h6" } ], diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-alertmanager-resources.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-alertmanager-resources.json new file mode 100644 index 0000000..5813e25 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-alertmanager-resources.json @@ -0,0 +1,697 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(alertmanager).*\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(alertmanager).*\"}\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Alertmanager resources", + "uid": "a6883fb22799ac74479c7db872451092", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-alertmanager.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-alertmanager.json new file mode 100644 index 0000000..6a84e19 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-alertmanager.json @@ -0,0 +1,2497 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total alerts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total silences", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(cortex_alertmanager_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Tenants", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "APS", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts received", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "per pod Active Aggregation Groups", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts grouping", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "NPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", + "format": "time_series", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", + "legendLink": null + } + ], + "title": "NPS by integration", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alert notifications", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod tenants", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod alerts", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod silences", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Replication", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Syncs/sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Syncs/sec (by reason)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "errors", + "legendLink": null + } + ], + "title": "Ring check errors/sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Tenant configuration sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{outcome}}", + "legendLink": null + } + ], + "title": "Initial syncs /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "1m", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "1m", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Initial sync duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Fetch state from other alertmanagers /sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sharding initial state sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 28, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Replicate state to other alertmanagers /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Merge state from other alertmanagers /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager.*|.*cortex|.*mimir|.*mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Persist state to remote storage /sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sharding runtime state sync", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Alertmanager", + "uid": "b0d38d318bbddd80476246d4930f9e55", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-compactor-resources.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-compactor-resources.json new file mode 100644 index 0000000..c98c5a0 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-compactor-resources.json @@ -0,0 +1,810 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU and memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(compactor).*\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(compactor).*\"}\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Compactor resources", + "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-compactor.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-compactor.json new file mode 100644 index 0000000..d5804e7 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-compactor.json @@ -0,0 +1,2161 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "completed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "started" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#34CCEB", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "started", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "completed", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Per-instance runs / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\n\n", + "fieldConfig": { + "defaults": { + "max": 1, + "noValue": 1, + "unit": "percentunit" + } + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Tenants compaction progress", + "type": "timeseries" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Longest time since last successful run\nDisplays the amount of time since the most recent successful execution\nof the compactor.\nThe value shown will be for the compactor replica that has the longest time since its\nlast successful run.\nThe table to the right shows a summary for all compactor replicas.\n\nIf there is no time value, one of the following messages might appear:\n\n- If you see \"No compactor data\" in this panel, that means that no compactors are active yet.\n\n- If you see \"No successful runs\" in this panel, that means that compactors are active, but none\n of them were successfully executed yet.\n\nThese might be expected - for example, if you just recently restarted your compactors,\nthey might not have had a chance to complete their first compaction run.\nHowever, if these messages persist, you should check the health of your compactors.\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "No compactor data", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Last run" + }, + "properties": [ + { + "id": "custom.width", + "value": 74 + }, + { + "id": "mappings", + "value": [ + { + "options": { + "from": "-Infinity", + "result": { + "color": "text", + "text": "No successful runs since startup yet" + }, + "to": 0 + }, + "type": "range" + } + ] + }, + { + "id": "color", + "value": { + "mode": "thresholds" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 7200 + }, + { + "color": "orange", + "value": 21600 + }, + { + "color": "red", + "value": 43200 + } + ] + } + } + ] + } + ] + }, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "reduceOptions": { + "calcs": [ + "first" + ], + "fields": "/^Last run$/", + "values": false + }, + "textMode": "value" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Longest time since last successful run", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transformations": [ + { + "id": "organize", + "options": { + "renameByName": { + "Value": "Last run", + "pod": "Compactor" + } + } + }, + { + "id": "sortBy", + "options": { + "sort": [ + { + "desc": true, + "field": "Last run" + } + ] + } + } + ], + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Last successful run per-compactor replica\nDisplays the compactor replicas, and for each, shows how long it has been since\nits last successful compaction run.\n\nThe value in the status column is based on how long it has been since the last successful compaction.\n\n- Okay: less than 2 hours\n- Delayed: more than 2 hours\n- Late: more than 6 hours\n- Very late: more than 12 hours\n\nIf the status of any compactor replicas are *Late* or *Very late*, check their health.\n\n", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "mappings", + "value": [ + { + "options": { + "from": "-Infinity", + "result": { + "color": "transparent", + "text": "N/A" + }, + "to": 0 + }, + "type": "range" + }, + { + "options": { + "from": 0, + "result": { + "color": "green", + "text": "Ok" + }, + "to": 7200 + }, + "type": "range" + }, + { + "options": { + "from": 7200, + "result": { + "color": "yellow", + "text": "Delayed" + }, + "to": 21600 + }, + "type": "range" + }, + { + "options": { + "from": 21600, + "result": { + "color": "orange", + "text": "Late" + }, + "to": 43200 + }, + "type": "range" + }, + { + "options": { + "from": 43200, + "result": { + "color": "red", + "text": "Very late" + }, + "to": "Infinity" + }, + "type": "range" + }, + { + "options": { + "match": "null+nan", + "result": { + "color": "transparent", + "text": "Unknown" + } + }, + "type": "special" + } + ] + }, + { + "id": "custom.width", + "value": 86 + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Last run" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "custom.width", + "value": 74 + }, + { + "id": "mappings", + "value": [ + { + "options": { + "from": "-Infinity", + "result": { + "text": "Never" + }, + "to": 0 + }, + "type": "range" + } + ] + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", + "format": "table", + "instant": true, + "legendFormat": "Last run", + "legendLink": null + } + ], + "title": "Last successful run per-compactor replica", + "transformations": [ + { + "id": "organize", + "options": { + "renameByName": { + "Value": "Last run", + "pod": "Compactor" + } + } + }, + { + "id": "sortBy", + "options": { + "sort": [ + { + "desc": true, + "field": "Last run" + } + ] + } + }, + { + "id": "calculateField", + "options": { + "alias": "One", + "binary": { + "left": "Last run", + "operator": "/", + "right": "Last run" + }, + "mode": "binary", + "replaceFields": false + } + }, + { + "id": "calculateField", + "options": { + "alias": "Status", + "binary": { + "left": "Last run", + "operator": "*", + "right": "One" + }, + "mode": "binary", + "replaceFields": false + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "Compactor", + "Last run", + "Status" + ] + } + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", + "format": "time_series", + "legendFormat": "Jobs", + "legendLink": null + } + ], + "title": "Estimated Compaction Jobs", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "compactions", + "legendLink": null + } + ], + "title": "TSDB compactions / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### TSDB compaction duration\nDisplay the amount of time that it has taken to run a single TSDB compaction.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "TSDB compaction duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + } + ], + "title": "Average blocks / tenant", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "Tenants with largest number of blocks", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "blocks", + "legendLink": null + } + ], + "title": "Blocks marked for deletion / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Blocks deletions / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Garbage collector", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Metadata syncs / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Metadata sync duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Metadata sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Object Store", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Key-value store for compactors ring", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Compactor", + "uid": "1b3443aea86db629e6efdb7d05c53823", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-config.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-config.json new file mode 100644 index 0000000..f45b396 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-config.json @@ -0,0 +1,258 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "instances" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", + "format": "time_series", + "legendFormat": "sha256:{{sha256}}", + "legendLink": null + } + ], + "title": "Startup config file hashes", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Startup config file", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "instances" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", + "format": "time_series", + "legendFormat": "sha256:{{sha256}}", + "legendLink": null + } + ], + "title": "Runtime config file hashes", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Runtime config file", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Config", + "uid": "5d9d0b4724c0f80d68467088ec61e003", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-object-store.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-object-store.json new file mode 100644 index 0000000..4de456c --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-object-store.json @@ -0,0 +1,822 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{component}}", + "legendLink": null + } + ], + "title": "RPS / component", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{component}}", + "legendLink": null + } + ], + "title": "Error rate / component", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Components", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "RPS / operation", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate / operation", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Operations", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Object Store", + "uid": "e1324ee2a434f4158c00a9ee279d3292", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-overrides.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-overrides.json new file mode 100644 index 0000000..708e1b2 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-overrides.json @@ -0,0 +1,266 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${datasource}", + "id": 1, + "span": 12, + "targets": [ + { + "expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Defaults", + "transformations": [ + { + "id": "labelsToFields", + "options": { } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Value": 1, + "limit_name": 0 + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { }, + "sort": [ + { + "field": "limit_name" + } + ] + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${datasource}", + "id": 2, + "span": 12, + "targets": [ + { + "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Per-tenant overrides", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "mode": "columns", + "valueLabel": "limit_name" + } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "user": 0 + } + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "Tenant ID", + "name": "tenant_id", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overrides", + "uid": "1e2c358600ac53f09faea133f811b5bb", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-overview-networking.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-overview-networking.json new file mode 100644 index 0000000..d2c1307 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-overview-networking.json @@ -0,0 +1,823 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overview networking", + "uid": "e15c71d372cc541367a088f10d9fcd92", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-overview-resources.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-overview-resources.json new file mode 100644 index 0000000..b757eb1 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-overview-resources.json @@ -0,0 +1,922 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(distributor|ingester|mimir-write).*\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(distributor|ingester|mimir-write).*\"}\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overview resources", + "uid": "a9b92d3c4d1af325d872a9e9a7083d71", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-overview.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-overview.json new file mode 100644 index 0000000..085edd1 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-overview.json @@ -0,0 +1,1708 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "The 'Status' panel shows an overview on the cluster health over the time.\nTo investigate failures, see a specific dashboard:\n\n- Writes\n- Reads\n- Rule evaluations\n- Alerting notifications\n- Object storage\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#7EB26D", + "value": null + }, + { + "color": "#EAB839", + "value": 0.01 + }, + { + "color": "#E24D42", + "value": 0.050000000000000003 + } + ] + } + } + }, + "id": 2, + "options": { + "showValue": "never" + }, + "span": 6, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval])))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))\n < ($latency_metrics * -Inf)", + "instant": false, + "legendFormat": "Writes", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n < ($latency_metrics * +Inf)", + "instant": false, + "legendFormat": "Writes", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*|error\"}[$__rate_interval])))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))\n < ($latency_metrics * -Inf)", + "instant": false, + "legendFormat": "Reads", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n < ($latency_metrics * +Inf)", + "instant": false, + "legendFormat": "Reads", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "instant": false, + "legendFormat": "Rule evaluations", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", + "instant": false, + "legendFormat": "Alerting notifications", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "instant": false, + "legendFormat": "Object storage", + "range": true + } + ], + "title": "Status", + "type": "state-timeline" + }, + { + "id": 3, + "options": { + "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", + "alertName": "Mimir", + "dashboardAlerts": false, + "maxItems": 100, + "sortOrder": 3, + "stateFilter": { + "error": true, + "firing": true, + "noData": false, + "normal": false, + "pending": false + } + }, + "span": 3, + "title": "Firing alerts", + "type": "alertlist" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Mimir cluster health", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "These panels show an overview on the write path. \nTo examine the write path in detail, see a specific dashboard:\n\n- Writes\n- Writes resources\n- Writes networking\n- Overview resources\n- Overview networking\n", + "datasource": null, + "description": "", + "id": 4, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Write requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Write latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "cps" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "samples / sec", + "legendLink": null + }, + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "exemplars / sec", + "legendLink": null + } + ], + "title": "Ingestion / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "These panels show an overview on the read path. \nTo examine the read path in detail, see a specific dashboard:\n\n- Reads\n- Reads resources\n- Reads networking\n- Overview resources\n- Overview networking\n- Queries\n- Compactor\n", + "datasource": null, + "description": "", + "id": 8, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Read requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Read latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "instant queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#429D48", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query_range($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "range queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#F1C731", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_labels($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label names\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#2A66CF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_label_name_values($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label values\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#9E44C1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_series($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "series queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#FFAB57", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_read($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "remote read queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#C79424", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_metadata($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "metadata queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#84D586", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query_exemplars($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "exemplar queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#A1C4FC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_active_series($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"active series\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#C788DE", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_label_names($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label name cardinality\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#3F6833", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_label_values($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label value cardinality\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#447EBC", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (route) (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)(_api_v1_query|_api_v1_query_range|_api_v1_labels|_api_v1_label_name_values|_api_v1_series|_api_v1_read|_api_v1_metadata|_api_v1_query_exemplars|_api_v1_cardinality_active_series|_api_v1_cardinality_label_names|_api_v1_cardinality_label_values)\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": "time_series", + "legendLink": null + }, + { + "expr": "sum by (route) (histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)(_api_v1_query|_api_v1_query_range|_api_v1_labels|_api_v1_label_name_values|_api_v1_series|_api_v1_read|_api_v1_metadata|_api_v1_query_exemplars|_api_v1_cardinality_active_series|_api_v1_cardinality_label_names|_api_v1_cardinality_label_values)\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "legendLink": null + }, + { + "expr": "sum (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\"(prometheus|api_prom)(_api_v1_query|_api_v1_query_range|_api_v1_labels|_api_v1_label_name_values|_api_v1_series|_api_v1_read|_api_v1_metadata|_api_v1_query_exemplars|_api_v1_cardinality_active_series|_api_v1_cardinality_label_names|_api_v1_cardinality_label_values)\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "other", + "legendLink": null + }, + { + "expr": "sum (histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\"(prometheus|api_prom)(_api_v1_query|_api_v1_query_range|_api_v1_labels|_api_v1_label_name_values|_api_v1_series|_api_v1_read|_api_v1_metadata|_api_v1_query_exemplars|_api_v1_cardinality_active_series|_api_v1_cardinality_label_names|_api_v1_cardinality_label_values)\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "other", + "legendLink": null + } + ], + "title": "Queries / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "These panels show an overview on the recording and alerting rules evaluation.\nTo examine the rules evaluation and alerts notifications in detail, see a specific dashboard:\n\n- Ruler\n- Alertmanager\n- Alertmanager resources\n- Overview resources\n- Overview networking\n", + "datasource": null, + "description": "", + "id": 12, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "success", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "missed", + "legendLink": null + } + ], + "title": "Rule evaluations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "average", + "legendLink": null + } + ], + "title": "Rule evaluations latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Alerting notifications sent to Alertmanager / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Recording and alerting rules", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "These panels show an overview on the long-term storage (object storage).\nTo examine the storage in detail, see a specific dashboard:\n\n- Object store\n- Compactor\n", + "datasource": null, + "description": "", + "id": 16, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "seriesOverrides": [ + { + "alias": "attributes", + "color": "#429D48" + }, + { + "alias": "delete", + "color": "#F1C731" + }, + { + "alias": "exists", + "color": "#2A66CF" + }, + { + "alias": "get", + "color": "#9E44C1" + }, + { + "alias": "get_range", + "color": "#FFAB57" + }, + { + "alias": "iter", + "color": "#C79424" + }, + { + "alias": "upload", + "color": "#84D586" + } + ], + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", + "format": "time_series", + "legendFormat": "blocks", + "legendLink": null + } + ], + "title": "Total number of blocks in the storage", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Long-term storage (object storage)", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overview", + "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-queries.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-queries.json new file mode 100644 index 0000000..3244caf --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-queries.json @@ -0,0 +1,2597 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{$read_path_matcher}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Queue duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_frontend_retries_sum{$read_path_matcher}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Retries", + "type": "timeseries", + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (cortex_query_frontend_queue_length{$read_path_matcher})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Queue length (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(user) (cortex_query_frontend_queue_length{$read_path_matcher}) > 0", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "Queue length (per user)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{$read_path_matcher}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Queue duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cortex_query_scheduler_queue_length{$read_path_matcher})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Queue length (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (cortex_query_scheduler_queue_length{$read_path_matcher}) > 0", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "Queue length (per user)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_split_queries_total{$read_path_matcher}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{$read_path_matcher, method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "splitting rate", + "legendLink": null + } + ], + "title": "Intervals per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{$read_path_matcher}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{$read_path_matcher}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", $read_path_matcher}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", $read_path_matcher}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", $read_path_matcher}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", $read_path_matcher}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", + "format": "time_series", + "legendFormat": "{{request_type}}", + "legendLink": null + } + ], + "title": "Query results cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{$read_path_matcher}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{$read_path_matcher}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Query results cache skipped", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend – query splitting and results cache", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{$read_path_matcher}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{$read_path_matcher}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "sharded queries ratio", + "legendLink": null + } + ], + "title": "Sharded queries ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{$read_path_matcher}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Number of sharded queries per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend – query sharding", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*.*ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*.*ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Series per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Samples per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Exemplars per query", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{$read_path_matcher}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Number of store-gateways hit per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{$read_path_matcher}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Refetches of missing blocks per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failure Rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{$read_path_matcher}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Failure Rate", + "legendLink": null + } + ], + "title": "Consistency checks failed", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{$read_path_matcher}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{$read_path_matcher, route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Rejected queries", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max(cortex_bucket_index_loaded{$read_path_matcher})", + "format": "time_series", + "legendFormat": "Max", + "legendLink": null + }, + { + "expr": "min(cortex_bucket_index_loaded{$read_path_matcher})", + "format": "time_series", + "legendFormat": "Min", + "legendLink": null + }, + { + "expr": "avg(cortex_bucket_index_loaded{$read_path_matcher})", + "format": "time_series", + "legendFormat": "Average", + "legendLink": null + } + ], + "title": "Bucket indexes loaded (per querier)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_index_loads_total{$read_path_matcher}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_index_load_failures_total{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Bucket indexes load / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{$read_path_matcher}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{$read_path_matcher}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{$read_path_matcher}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Bucket indexes load latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "blocks", + "legendLink": null + } + ], + "title": "Blocks queried / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "binBps" + }, + "overrides": [ ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{data_type}}", + "legendLink": null + } + ], + "title": "Data fetched / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "binBps" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{data_type}}", + "legendLink": null + } + ], + "title": "Data touched / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "{{stage}}", + "legendLink": null + } + ], + "title": "Series request average latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])))\n", + "format": "time_series", + "legendFormat": "{{stage}}", + "legendLink": null + } + ], + "title": "Series request 99th percentile latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 28, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", + "format": "time_series", + "legendFormat": "% of time reduced by preloading", + "legendLink": null + } + ], + "title": "Series batch preloading efficiency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Blocks currently owned", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Blocks loaded / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 31, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Blocks dropped / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 32, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Lazy loaded index-headers", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 33, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Index-header lazy load duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 34, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Index-header lazy load gate latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 35, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "Series hash cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 36, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "ExpandedPostings cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 37, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "Chunks attributes in-memory cache hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "All", + "value": "cluster=~\"$cluster\"\\, job=~\"($namespace)/((.*cortex|mimir|mimir-backend.*|mimir-read.*|querier.*|query-frontend.*|query-scheduler.*|ruler-querier.*|ruler-query-frontend.*|ruler-query-scheduler.*))\"" + }, + "hide": 0, + "includeAll": false, + "label": "Read path", + "multi": false, + "name": "read_path_matcher", + "options": [ + { + "selected": true, + "text": "All", + "value": "cluster=~\"$cluster\"\\, job=~\"($namespace)/((.*cortex|mimir|mimir-backend.*|mimir-read.*|querier.*|query-frontend.*|query-scheduler.*|ruler-querier.*|ruler-query-frontend.*|ruler-query-scheduler.*))\"" + }, + { + "selected": false, + "text": "Main", + "value": "cluster=~\"$cluster\"\\, job=~\"($namespace)/((.*cortex|mimir|mimir-backend.*|mimir-read.*|querier.*|query-frontend.*|query-scheduler.*))\"" + }, + { + "selected": false, + "text": "Remote ruler", + "value": "cluster=~\"$cluster\"\\, job=~\"($namespace)/((.*ruler-querier.*|ruler-query-frontend.*|ruler-query-scheduler.*))\"" + } + ], + "query": "All : cluster=~\"$cluster\"\\, job=~\"($namespace)/((.*cortex|mimir|mimir-backend.*|mimir-read.*|querier.*|query-frontend.*|query-scheduler.*|ruler-querier.*|ruler-query-frontend.*|ruler-query-scheduler.*))\",Main : cluster=~\"$cluster\"\\, job=~\"($namespace)/((.*cortex|mimir|mimir-backend.*|mimir-read.*|querier.*|query-frontend.*|query-scheduler.*))\",Remote ruler : cluster=~\"$cluster\"\\, job=~\"($namespace)/((.*ruler-querier.*|ruler-query-frontend.*|ruler-query-scheduler.*))\"", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Queries", + "uid": "b3abe8d5c040395cc36615cb4334c92d", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-reads-networking.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-reads-networking.json new file mode 100644 index 0000000..71f135b --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-reads-networking.json @@ -0,0 +1,1510 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Reads networking", + "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-reads-resources.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-reads-resources.json new file mode 100644 index 0000000..2ea7e75 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-reads-resources.json @@ -0,0 +1,2449 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Rules", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(store-gateway).*\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(store-gateway).*\"}\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Reads resources", + "uid": "cc86fd5aa9301c6528986572ad974db9", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-reads.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-reads.json new file mode 100644 index 0000000..b61ed5f --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-reads.json @@ -0,0 +1,4889 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "175px", + "panels": [ + { + "content": "

\n This dashboard shows health metrics for the read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Reads dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", + "fill": 1, + "format": "reqps", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval])) + sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": "time_series", + "instant": true, + "refId": "A_classic" + }, + { + "expr": "sum (histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval]))) + sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) < ($latency_metrics * -Inf)", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Instant queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Range queries per second\nRate of range queries per second being made to\nMimir via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": "time_series", + "instant": true, + "refId": "A_classic" + }, + { + "expr": "sum (histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Range queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### \"Label names\" queries per second\nRate of \"label names\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": "time_series", + "instant": true, + "refId": "A_classic" + }, + { + "expr": "sum (histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Label names queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### \"Label values\" queries per second\nRate of specific \"label values\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": "time_series", + "instant": true, + "refId": "A_classic" + }, + { + "expr": "sum (histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Label values queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Series queries per second\nRate of series queries per second being made to\nMimir via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": "time_series", + "instant": true, + "refId": "A_classic" + }, + { + "expr": "sum (histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Series queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (le,pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))) < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (pod) (rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (Time in Queue)", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "queries" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__interval]))", + "format": "time_series", + "legendFormat": "Queue length", + "legendLink": null + } + ], + "title": "Queue length", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "99th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "50th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "Average: {{ additional_queue_dimensions }}", + "refId": "C" + } + ], + "title": "Average Latency by Queue Dimension", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "Requests/s", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cache – query results", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (le,pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}[$__rate_interval]))) < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (pod) (rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (le,pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]))) < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (pod) (rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Replicas\nThe maximum, and current number of querier replicas.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Max .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Current .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Min .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_max_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n # Add the scaletargetref_name label which is more readable than \"kube-hpa-...\"\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Max {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_status_current_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n # Add the scaletargetref_name label which is more readable than \"kube-hpa-...\"\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Current {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_min_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n # Add the scaletargetref_name label which is more readable than \"kube-hpa-...\"\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Min {{ scaletargetref_name }}", + "legendLink": null + } + ], + "title": "Replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (desired replicas)\nThis panel shows the result scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints which are applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 28, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster=~\"$cluster\", exported_namespace=~\"$namespace\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster, namespace, scaledObject, metric) group_left\n label_replace(label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ), \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\")\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (desired replicas)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler wouldn't work properly.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(cluster, namespace, scaler, metric, scaledObject) (\n label_replace(\n rate(keda_scaler_errors[$__rate_interval]),\n \"namespace\", \"$1\", \"exported_namespace\", \"(.+)\"\n )\n) +\non(cluster, namespace, metric, scaledObject) group_left\nlabel_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"} * 0,\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n ),\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "legendFormat": "{{scaler}} failures", + "legendLink": null + } + ], + "title": "Autoscaler failures rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier – autoscaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 31, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway – key-value store for store-gateways ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 32, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 33, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 34, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "legendFormat": "{{item_type}}", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – block index cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 35, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 36, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 37, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – chunks cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 38, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 39, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 40, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – metadata cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 41, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 42, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 43, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((.*querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – metadata cache (querier accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 44, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 45, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 46, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 47, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Blocks object store (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 48, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 49, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 50, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 51, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 52, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 53, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 54, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 55, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Blocks object store (querier accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 56, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 57, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 58, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 59, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Reads", + "uid": "e327503188913dc38ad571c647eef643", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads-networking.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads-networking.json new file mode 100644 index 0000000..5226fd3 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads-networking.json @@ -0,0 +1,1052 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-querier", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Remote ruler reads networking", + "uid": "9e8cfff65f91632f8a25981c6fe44bc9", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads-resources.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads-resources.json new file mode 100644 index 0000000..1d533e3 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads-resources.json @@ -0,0 +1,982 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-querier", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Remote ruler reads resources", + "uid": "1940f6ef765a506a171faa2056c956c3", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads.json new file mode 100644 index 0000000..9b43b3a --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-remote-ruler-reads.json @@ -0,0 +1,1749 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "175px", + "panels": [ + { + "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Remote ruler reads dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", + "fill": 1, + "format": "reqps", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": "time_series", + "instant": true, + "refId": "A_classic" + }, + { + "expr": "sum (histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Evaluations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (le,pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]))) < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (pod) (rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (Time in Queue)", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "queries" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__interval]))", + "format": "time_series", + "legendFormat": "Queue length", + "legendLink": null + } + ], + "title": "Queue length", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "99th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "50th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "Average: {{ additional_queue_dimensions }}", + "refId": "C" + } + ], + "title": "Average Latency by Queue Dimension", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Replicas\nThe minimum, maximum, and current number of ruler-querier replicas.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Max .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Current .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Min .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_max_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n # Add the scaletargetref_name label for readability\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Max {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_status_current_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n # Add the scaletargetref_name label for readability\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Current {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_min_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n # Add the scaletargetref_name label for readability\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Min {{ scaletargetref_name }}", + "legendLink": null + } + ], + "title": "Replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler wouldn't work properly.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(cluster, namespace, scaler, metric, scaledObject) (\n label_replace(\n rate(keda_scaler_errors[$__rate_interval]),\n \"namespace\", \"$1\", \"exported_namespace\", \"(.+)\"\n )\n) +\non(cluster, namespace, metric, scaledObject) group_left\nlabel_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"} * 0,\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n ),\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "legendFormat": "{{scaler}} failures", + "legendLink": null + } + ], + "title": "Autoscaler failures rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-querier - autoscaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Scaling metric (CPU): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*cpu.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (CPU): Desired replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (memory): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*memory.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (memory): Desired replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (in-flight queries): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*queries.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (in-flight queries): Desired replicas", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Remote ruler reads", + "uid": "f103238f7f5ab2f1345ce650cbfbfe2f", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-rollout-progress.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-rollout-progress.json new file mode 100644 index 0000000..a741a8c --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-rollout-progress.json @@ -0,0 +1,1525 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Ready" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Updated" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 10, + "x": 0, + "y": 0 + }, + "id": 1, + "links": [ ], + "options": { + "barRadius": 0, + "barWidth": 0.96999999999999997, + "fullHighlight": false, + "groupWidth": 0.69999999999999996, + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "horizontal", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "multi", + "sort": "desc" + }, + "xField": "Workload", + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", + "format": "table", + "instant": true, + "intervalFactor": null, + "legendFormat": "__auto", + "legendLink": null, + "step": null + }, + { + "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", + "format": "table", + "instant": true, + "intervalFactor": null, + "legendFormat": "__auto", + "legendLink": null, + "step": null + } + ], + "title": "Rollout progress", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time 1": true, + "Time 2": true + }, + "renameByName": { + "Value #A": "Updated", + "Value #B": "Ready", + "workload": "Workload" + } + } + }, + { + "id": "sortBy", + "options": { + "sort": [ + { + "field": "Workload" + } + ] + } + } + ], + "type": "barchart" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\", status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + }, + { + "expr": "sum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\", status_code=~\"2.+\"}[$__rate_interval]))) /\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 2xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.20000000000000001 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 0 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\", status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + }, + { + "expr": "sum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\", status_code=~\"4.+\"}[$__rate_interval]))) /\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 4xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 0 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\", status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + }, + { + "expr": "sum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\", status_code=~\"5.+\"}[$__rate_interval]))) /\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 5xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.20000000000000001 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"})) < ($latency_metrics * +Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"})) < ($latency_metrics * -Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes 99th latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 4 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\", status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + }, + { + "expr": "sum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\", status_code=~\"2.+\"}[$__rate_interval]))) /\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 2xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.01 + }, + { + "color": "red", + "value": 0.050000000000000003 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 4 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\", status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + }, + { + "expr": "sum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\", status_code=~\"4.+\"}[$__rate_interval]))) /\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 4xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 4 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\", status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])) < ($latency_metrics * +Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + }, + { + "expr": "sum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\", status_code=~\"5.+\"}[$__rate_interval]))) /\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 5xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2.5 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) < ($latency_metrics * +Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) < ($latency_metrics * -Inf)", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads 99th latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "noValue": "All healthy", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 3, + "w": 10, + "x": 0, + "y": 13 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "text": { + "titleSize": 14, + "valueSize": 14 + }, + "textMode": "value_and_name" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{deployment}}", + "legendLink": null, + "step": null + }, + { + "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{statefulset}}", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Unhealthy pods", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "r.*" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 10, + "y": 8 + }, + "id": 11, + "targets": [ + { + "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pods count per version", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "valueLabel": "version" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "container": 1 + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { }, + "sort": [ + { + "field": "container" + } + ] + } + } + ], + "type": "table" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "writes", + "legendLink": null + }, + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "writes", + "legendLink": null + }, + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "reads", + "legendLink": null + }, + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "reads", + "legendLink": null + } + ], + "title": "Latency vs 24h ago", + "type": "timeseries" + } + ], + "refresh": "5m", + "rows": null, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Rollout progress", + "uid": "7f0b5567d543a1698e695b530eb7f5de", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-ruler.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-ruler.json new file mode 100644 index 0000000..e25ff86 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-ruler.json @@ -0,0 +1,2646 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Active configurations", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total rules", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Reads from ingesters - RPS\nNote: Even while operating in Remote ruler mode you will still see values for this panel.\n\nThis is because the metrics are inclusive of intermediate services and are showing the requests that ultimately reach the ingesters.\n\nFor a more detailed view of the read path when using remote ruler mode, see the Remote ruler reads dashboard.\n\n", + "fill": 1, + "format": "reqps", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Reads from ingesters - RPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "reqps", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))\n", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Writes to ingesters - RPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "success", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "missed", + "legendLink": null + } + ], + "title": "Evaluations per second", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "average", + "legendLink": null + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rule evaluations global", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes (ingesters)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads (ingesters)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler - key-value store for rulers ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Number of store-gateways hit per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Refetches of missing blocks per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failures / sec" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Failures / sec", + "legendLink": null + } + ], + "title": "Consistency checks failed", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler - blocks storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Delivery errors", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"})\n /\nsum by(user) (cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}) > 0\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Queue length", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Dropped", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Notifications", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Missed iterations", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", + "format": "time_series", + "legendFormat": "{{ rule_group }}", + "legendLink": null + } + ], + "title": "Failures", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Group evaluations", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rule evaluation per user", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler configuration object store (ruler accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 27, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 28, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 30, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Ruler", + "uid": "631e15d5d85afb2ca8e35d62984eeaa0", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-scaling.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-scaling.json new file mode 100644 index 0000000..4dbb7bd --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-scaling.json @@ -0,0 +1,392 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "id": 1, + "options": { + "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", + "mode": "markdown" + }, + "span": 12, + "title": "", + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Service scaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "400px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "Required Replicas" + }, + { + "id": "decimals", + "value": 0 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "__name__" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "displayName", + "value": "Cluster" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "deployment" + }, + "properties": [ + { + "id": "displayName", + "value": "Service" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "namespace" + }, + "properties": [ + { + "id": "displayName", + "value": "Namespace" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "reason" + }, + "properties": [ + { + "id": "displayName", + "value": "Reason" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + } + ] + }, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Workload-based scaling", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Scaling", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Scaling", + "uid": "64bbad83507b7289b514725658e10352", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-slow-queries.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-slow-queries.json new file mode 100644 index 0000000..76e9a10 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-slow-queries.json @@ -0,0 +1,1487 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Across tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 User-Agents", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "fetched_chunk_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_index_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "response_size_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "results_cache_hit_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "results_cache_miss_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "estimated_series_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_chunks_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_series_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Time span" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Duration" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Step" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "queue_time_seconds" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "query_wall_time_seconds" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + } + ] + }, + "height": "500px", + "id": 19, + "span": 12, + "targets": [ + { + "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"$component.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", + "instant": false, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Slow queries", + "transformations": [ + { + "id": "extractFields", + "options": { + "source": "labels" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Line": true, + "Time": true, + "caller": true, + "cluster": true, + "component": true, + "container": true, + "gossip_ring_member": true, + "host": true, + "id": true, + "job": true, + "labels": true, + "length": true, + "level": true, + "line": true, + "method": true, + "msg": true, + "name": true, + "namespace": true, + "param_step": true, + "path": true, + "pod": true, + "pod_template_hash": true, + "response_time": true, + "stream": true, + "traceID": true, + "tsNs": true + }, + "indexByName": { + "err": 10, + "length_seconds": 3, + "param_end": 5, + "param_query": 8, + "param_start": 4, + "param_step_seconds": 7, + "param_time": 6, + "response_time_seconds": 9, + "status": 1, + "ts": 0, + "user": 2 + }, + "renameByName": { + "err": "Error", + "length_seconds": "Time span", + "param_end": "End", + "param_query": "Query", + "param_start": "Start", + "param_step_seconds": "Step", + "param_time": "Time (instant query)", + "response_time_seconds": "Duration", + "ts": "Completion date", + "user": "Tenant ID" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "sharded_queries" + }, + { + "destinationType": "number", + "targetField": "split_queries" + }, + { + "destinationType": "number", + "targetField": "fetched_chunk_bytes" + }, + { + "destinationType": "number", + "targetField": "fetched_index_bytes" + }, + { + "destinationType": "number", + "targetField": "response_size_bytes" + }, + { + "destinationType": "number", + "targetField": "results_cache_hit_bytes" + }, + { + "destinationType": "number", + "targetField": "results_cache_miss_bytes" + }, + { + "destinationType": "number", + "targetField": "estimated_series_count" + }, + { + "destinationType": "number", + "targetField": "fetched_chunks_count" + }, + { + "destinationType": "number", + "targetField": "fetched_series_count" + }, + { + "destinationType": "number", + "targetField": "Time span" + }, + { + "destinationType": "number", + "targetField": "Duration" + }, + { + "destinationType": "number", + "targetField": "Step" + }, + { + "destinationType": "number", + "targetField": "queue_time_seconds" + }, + { + "destinationType": "number", + "targetField": "query_wall_time_seconds" + } + ] + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "includeAll": false, + "label": "Loki data source", + "multi": false, + "name": "loki_datasource", + "query": "loki", + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "5s", + "value": "5s" + }, + "hide": 0, + "label": "Min duration", + "name": "min_duration", + "options": [ + { + "selected": true, + "text": "5s", + "value": "5s" + } + ], + "query": "5s", + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "Tenant ID", + "name": "tenant_id", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "User-Agent HTTP Header", + "name": "user_agent", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": "query-frontend", + "value": "query-frontend" + }, + "label": "Component", + "multi": false, + "name": "component", + "options": [ + { + "selected": true, + "text": "query-frontend", + "value": "query-frontend" + }, + { + "selected": false, + "text": "ruler-query-frontend", + "value": "ruler-query-frontend" + } + ], + "query": "query-frontend, ruler-query-frontend", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Slow queries", + "uid": "6089e1ce1e678788f46312a0a1e647e6", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-tenants.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-tenants.json new file mode 100644 index 0000000..e90d330 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-tenants.json @@ -0,0 +1,2744 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ + { + "datasource": "$datasource", + "enable": true, + "expr": "sum by (user) (cortex_ingester_active_series_loading{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}) > 0", + "filter": { + "exclude": false, + "ids": [ + 2, + 7, + 8 + ] + }, + "hide": true, + "iconColor": "yellow", + "name": "Active Series Reload", + "titleFormat": "Active series reloading for user {{user}}" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "25px", + "panels": [ + { + "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Tenants dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n)\n unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n)\n,\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "in-memory", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((.*overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((.*overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "active", + "legendLink": null + }, + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "owned", + "legendLink": null + }, + { + "expr": "( # Classic storage\n sum by (cluster, namespace, name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, name) (\n max by (ingester_id, cluster, namespace, name) (\n label_replace(\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "active ({{ name }})", + "legendLink": null + } + ], + "title": "All series", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/local limit .+/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "local limit ({{job}})", + "legendLink": null + }, + { + "expr": "cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "In-memory series per ingester", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/local limit .+/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "local limit ({{job}})", + "legendLink": null + }, + { + "expr": "cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Owned series per ingester", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Tenant series counts", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "series", + "legendLink": null + } + ], + "title": "Series with exemplars", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", + "format": "time_series", + "legendFormat": "age", + "legendLink": null + } + ], + "title": "Oldest exemplar age", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "active", + "legendLink": null + }, + { + "expr": "( # Classic storage\n sum by (cluster, namespace, name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, name) (\n max by (ingester_id, cluster, namespace, name) (\n label_replace(\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "active ({{ name }})", + "legendLink": null + } + ], + "title": "Native histogram series", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "buckets", + "legendLink": null + }, + { + "expr": "( # Classic storage\n sum by (cluster, namespace, name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, name) (\n max by (ingester_id, cluster, namespace, name) (\n label_replace(\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "buckets ({{ name }})", + "legendLink": null + } + ], + "title": "Total number of buckets used by native histogram series", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Exemplars and native histograms", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor requests incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((.*overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((.*overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Distributor requests received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", + "format": "time_series", + "legendFormat": "age", + "legendLink": null + } + ], + "title": "Newest seen sample age", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }}", + "legendLink": null + } + ], + "title": "Distributor discarded requests rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor ingestion requests", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor samples incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((.*overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((.*overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Distributor samples received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "deduplicated", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "non-HA", + "legendLink": null + } + ], + "title": "Distributor deduplicated/non-HA", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\nThis doesn't account for the replication factor.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }} (distributor)", + "legendLink": null + }, + { + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }} (ingester, replicated)", + "legendLink": null + } + ], + "title": "Distributor and ingester discarded samples rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Out-of-order samples appended\nThe rate of OOO samples that have been appended.\nThis doesn't account for the replication factor.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_out_of_order_samples_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Out-of-order samples appended", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Samples ingestion funnel", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor exemplars incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor exemplars received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }}", + "legendLink": null + } + ], + "title": "Distributor discarded exemplars rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\nThis doesn't account for the replication factor.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]) unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]),\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Ingester appended exemplars rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Exemplars ingestion funnel", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Symbol table size for loaded blocks", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Space used by local blocks", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingesters' storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", + "format": "time_series", + "legendFormat": "groups", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((.*overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((.*overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Number of groups", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "rules", + "legendLink": null + } + ], + "title": "Number of rules", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Total evaluations rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "topk(50, sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0)", + "format": "time_series", + "legendFormat": "{{ rule_group }}", + "legendLink": null + } + ], + "title": "Failed evaluations rate (top 50 rule groups)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rules", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "rules" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + } + ] + }, + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit biggest groups", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "seconds" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + } + ] + }, + "fill": 1, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit slowest groups (last evaluation)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top rules", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Sent notifications rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 31, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Failed notifications rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Notifications", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 32, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "alerts", + "legendLink": null + }, + { + "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "silences", + "legendLink": null + } + ], + "title": "Alerts", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 33, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "NPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 34, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", + "format": "time_series", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", + "legendLink": null + } + ], + "title": "NPS by integration", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 35, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-frontend.*|cortex|mimir|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Queries / Sec", + "legendLink": null + } + ], + "title": "Rate of Read Requests - query-frontend", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 36, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((.*query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "Queue Length", + "legendLink": null + } + ], + "title": "Number of Queries Queued - query-scheduler", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Read Path - Queries (User)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 37, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Queries / Sec", + "legendLink": null + } + ], + "title": "Rate of Read Requests - ruler-query-frontend", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 38, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler-query-scheduler.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "Queue Length", + "legendLink": null + } + ], + "title": "Number of Queries Queued - ruler-query-scheduler", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Read Path - Queries (Ruler)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 50, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 39, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Estimated Compaction Jobs", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 40, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by (user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Blocks", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Compactions", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "user", + "multi": false, + "name": "user", + "options": [ ], + "query": "label_values(cortex_ingester_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\"}, user)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 0, + "includeAll": false, + "label": "limit", + "multi": false, + "name": "limit", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + }, + { + "selected": false, + "text": "500", + "value": "500" + }, + { + "selected": false, + "text": "1000", + "value": "1000" + } + ], + "query": "10 : 10,50 : 50,100 : 100,500 : 500,1000 : 1000", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Tenants", + "uid": "35fa247ce651ba189debf33d7ae41611", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-top-tenants.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-top-tenants.json new file mode 100644 index 0000000..988d722 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-top-tenants.json @@ -0,0 +1,1742 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "25px", + "panels": [ + { + "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Top tenants dashboard description", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "series" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, ( # Classic storage\n sum by (cluster, namespace, user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, user) (\n max by (ingester_id, cluster, namespace, user) (\n label_replace(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by active series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By active series", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "series" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, ( # Classic storage\n sum by (cluster, namespace, user) (\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} \n -\n cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} \n)\n unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, user) (\n max by (ingester_id, cluster, namespace, user) (\n label_replace(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} \n -\n cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} \n)\n,\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n)", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by in-memory series (series created - series removed)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By in-memory series", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "(( # Classic storage\n sum by (cluster, namespace, user) (\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} \n -\n cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} \n)\n unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, user) (\n max by (ingester_id, cluster, namespace, user) (\n label_replace(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} \n -\n cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} \n)\n,\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n)\nand\ntopk($limit,\n (\n ( # Classic storage\n sum by (cluster, namespace, user) (\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} @ end()\n -\n cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} @ end()\n)\n unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, user) (\n max by (ingester_id, cluster, namespace, user) (\n label_replace(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} @ end()\n -\n cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} @ end()\n)\n,\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n\n )\n -\n (\n ( # Classic storage\n sum by (cluster, namespace, user) (\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} @ start()\n -\n cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} @ start()\n)\n unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, user) (\n max by (ingester_id, cluster, namespace, user) (\n label_replace(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} @ start()\n -\n cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} @ start()\n)\n,\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By in-memory series growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "samples/s" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by received samples rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By samples rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By samples rate growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "samples/s" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by discarded samples rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By discarded samples rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By discarded samples rate growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "series" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, ( # Classic storage\n sum by (cluster, namespace, user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, user) (\n max by (ingester_id, cluster, namespace, user) (\n label_replace(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by series with exemplars", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By series with exemplars", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "exemplars/s" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by received exemplars rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By exemplars rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "rules" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 3, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit biggest groups", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By rule group size", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "seconds" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 3, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*ruler|cortex|mimir|mimir-backend.*))\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit slowest groups (last evaluation)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By rule group evaluation time", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "Compaction Jobs" + }, + { + "id": "decimals", + "value": 0 + }, + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "displayName", + "value": "user" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by estimated compaction jobs from bucket-index", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By estimated compaction jobs from bucket-index", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 0, + "includeAll": false, + "label": "limit", + "multi": false, + "name": "limit", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + } + ], + "query": "10 : 10,50 : 50,100 : 100", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Top tenants", + "uid": "bc6e12d4fe540e4a1785b9d3ca0ffdd9", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-writes-networking.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-writes-networking.json new file mode 100644 index 0000000..78a92bd --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-writes-networking.json @@ -0,0 +1,823 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingress TCP connections (per pod)\nThe number of ingress TCP connections (HTTP and gRPC protocol).\n", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Ingress TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Writes networking", + "uid": "978c1cb452585c96697a238eaac7fe2d", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-writes-resources.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-writes-resources.json new file mode 100644 index 0000000..b96e907 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-writes-resources.json @@ -0,0 +1,1186 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "In-memory series", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(ingester).*\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(ingester).*\"}\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Writes resources", + "uid": "bc9160e50b52e89e0e49c840fea3d379", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/mimir/mimir-writes.json b/charts/meta-monitoring/src/dashboards/mimir/mimir-writes.json new file mode 100644 index 0000000..fbf3c13 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/mimir/mimir-writes.json @@ -0,0 +1,3344 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "5m", + "rows": [ + { + "collapse": false, + "height": "125px", + "panels": [ + { + "content": "

\n This dashboard shows various health metrics for the write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Writes dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Samples / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Exemplars / sec\nThe total number of received exemplars by the distributors, excluding rejected and deduped exemplars, but not necessarily ingested by the ingesters.\n\n", + "fill": 1, + "format": "short", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Exemplars / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### In-memory series\nThe number of series not yet flushed to object storage that are held in ingester memory.\nWith classic storage we the sum of series from all ingesters is divided by the replication factor.\nWith ingest storage we take the maximum series of each ingest partition.\n\n", + "fill": 1, + "format": "short", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "In-memory series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Exemplars in ingesters\nNumber of TSDB exemplars currently in ingesters' storage.\nWith classic storage we the sum of exemplars from all ingesters is divided by the replication factor.\nWith ingest storage we take the maximum exemplars of each ingest partition.\n\n", + "fill": 1, + "format": "short", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Exemplars in ingesters", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Tenants", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (le,pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))) < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (pod) (rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A_classic" + }, + { + "expr": "sum by (status) (\n label_replace(label_replace(histogram_count(rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_classic" + }, + { + "expr": "histogram_quantile(0.99, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A_native" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3 < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_classic" + }, + { + "expr": "histogram_quantile(0.50, sum (cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3 < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B_native" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) /\nsum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})\n < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_classic" + }, + { + "expr": "1e3 * sum(histogram_sum(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) /\nsum(histogram_count(cluster_job_route:cortex_request_duration_seconds:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}))\n < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "Average", + "refId": "C_native" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (le,pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval]))) < ($latency_metrics * +Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by (pod) (rate(cortex_request_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Replicas\nThe minimum, maximum, and current number of distributor replicas.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Max .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Current .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Min .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_max_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n # Add the scaletargetref_name label for readability\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n)\n", + "format": "time_series", + "legendFormat": "Max {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_status_current_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n # Add the scaletargetref_name label for readability\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n)\n", + "format": "time_series", + "legendFormat": "Current {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_min_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n # Add the scaletargetref_name label for readability\n + on (cluster, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n)\n", + "format": "time_series", + "legendFormat": "Min {{ scaletargetref_name }}", + "legendLink": null + } + ], + "title": "Replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (CPU): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*cpu.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (CPU): Desired replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (memory): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*memory.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (memory): Desired replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler wouldn't work properly.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(cluster, namespace, scaler, metric, scaledObject) (\n label_replace(\n rate(keda_scaler_errors[$__rate_interval]),\n \"namespace\", \"$1\", \"exported_namespace\", \"(.+)\"\n )\n) +\non(cluster, namespace, metric, scaledObject) group_left\nlabel_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"} * 0,\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n ),\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "legendFormat": "{{scaler}} failures", + "legendLink": null + } + ], + "title": "Autoscaler failures rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor – autoscaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - key-value store for high-availability (HA) deduplication", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - key-value store for distributors ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - key-value store for the ingesters ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Uploaded blocks / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 24, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Upload latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester – shipper", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Compactions / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Compactions latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester – TSDB head", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "WAL truncations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 28, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Checkpoints created / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + } + ], + "title": "WAL truncations latency (includes checkpointing)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "WAL" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "mmap-ed chunks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E28A42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "WAL", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "mmap-ed chunks", + "legendLink": null + } + ], + "title": "Corruptions / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester – TSDB write ahead log (WAL)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ex/s" + }, + "overrides": [ ] + }, + "id": 31, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "incoming exemplars", + "legendLink": null + } + ], + "title": "Distributor exemplars incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor exemplars received rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time, see `cortex_discarded_exemplars_total` for specific reasons rates.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ex/s" + }, + "overrides": [ ] + }, + "id": 32, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "received exemplars", + "legendLink": null + } + ], + "title": "Distributor exemplars received rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingester ingested exemplars rate\nThe rate of exemplars ingested in the ingesters.\nEvery exemplar is replicated to a number of ingesters. With classic storage we the sum of rates from all ingesters is divided by the replication factor.\nWith ingest storage we take the maximum rate of each ingest partition.\nThis ingested exemplars rate should match the distributor's received exemplars rate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ex/s" + }, + "overrides": [ ] + }, + "id": 33, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "ingested exemplars", + "legendLink": null + } + ], + "title": "Ingester ingested exemplars rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingester appended exemplars rate\nThe rate of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ex/s" + }, + "overrides": [ ] + }, + "id": 34, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "( # Classic storage\n sum by (cluster, namespace, ) (\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"} unless on (job)\n cortex_partition_ring_partitions{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}\n )\n / on (cluster, namespace) group_left()\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"})\n)\nor\n( # Ingest storage\n sum by (cluster, namespace, ) (\n max by (ingester_id, cluster, namespace, ) (\n label_replace(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"},\n \"ingester_id\", \"$1\", \"pod\", \".*-([0-9]+)$\"\n )\n )\n )\n)\n", + "format": "time_series", + "legendFormat": "appended exemplars", + "legendLink": null + } + ], + "title": "Ingester appended exemplars rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Exemplars", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 35, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Rejected distributor requests", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 36, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((.*ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Rejected ingester requests", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Instance Limits", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Writes", + "uid": "8280707b8f16e7b87b840fc1cc92d4c5", + "version": 0 + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/tempo/tempo-operational.json b/charts/meta-monitoring/src/dashboards/tempo/tempo-operational.json new file mode 100644 index 0000000..5e811d1 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/tempo/tempo-operational.json @@ -0,0 +1,7592 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + + ], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": { + "uid": "$logsds" + }, + "enable": true, + "expr": "{cluster=\"$cluster\", diff_namespace=\"$namespace\", container=\"kube-diff-logger\"}", + "hide": true, + "iconColor": "rgba(255, 96, 96, 1)", + "name": "diffs", + "showIn": 0, + "target": { + + } + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "tempo" + ], + "targetBlank": false, + "title": "Tempo Dashboards", + "type": "dashboards" + } + ], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 38, + "panels": [ + + ], + "title": "General", + "type": "row" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "gcs", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "go heap", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "go_goroutines{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "goroutines", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 42, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 5, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "cpu", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 43, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\"}", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "working set", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 15, + "y": 1 + }, + "id": 44, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\"}[$__rate_interval])", + "hide": false, + "interval": "", + "legendFormat": "rx-{{pod}}", + "refId": "A" + }, + { + "expr": "rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\"}[$__rate_interval])", + "hide": false, + "interval": "", + "legendFormat": "tx-{{pod}}", + "refId": "B" + } + ], + "title": "tx/rx", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 18, + "y": 1 + }, + "id": 45, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "kubelet_volume_stats_available_bytes{cluster=\"$cluster\", namespace=\"$namespace\", persistentvolumeclaim=~\"$component.*\"}", + "legendFormat": "{{persistentvolumeclaim}}", + "refId": "A" + } + ], + "title": "data volume free", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", app=~\"$component.*\"}[$__rate_interval])", + "interval": "", + "legendFormat": "{{exported_pod}}", + "refId": "A" + } + ], + "title": "bad words", + "type": "timeseries" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 49, + "panels": [ + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 7 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "tempodb_work_queue_length{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"} / tempodb_work_queue_max{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "%age total work queue", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 7 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(increase(tempodb_compaction_errors_total{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (job)", + "legendFormat": "compaction_err", + "refId": "B" + }, + { + "expr": "sum(increase(tempodb_retention_errors_total{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (job)", + "legendFormat": "retention_err", + "refId": "C" + }, + { + "expr": "sum(increase(tempodb_blocklist_poll_errors_total{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (job)", + "legendFormat": "blocklist_err", + "refId": "D" + } + ], + "title": "maintenance errors", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 7 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-75324", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempodb_blocklist_poll_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (le)) < ($latency_metrics * +Inf)", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempodb_blocklist_poll_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (le)) < ($latency_metrics * +Inf)", + "legendFormat": ".9", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempodb_blocklist_poll_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (le)) < ($latency_metrics * +Inf)", + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempodb_blocklist_poll_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (le)) < ($latency_metrics * -Inf)", + "hide": false, + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempodb_blocklist_poll_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (le)) < ($latency_metrics * -Inf)", + "hide": false, + "legendFormat": ".9", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempodb_blocklist_poll_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (le)) < ($latency_metrics * -Inf)", + "hide": false, + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "F" + } + ], + "title": "Blocklist Poll Duration", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 9, + "y": 7 + }, + "id": 47, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "avg(tempodb_blocklist_length{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}) by (tenant)", + "instant": false, + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Blocklist Length", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 12, + "y": 7 + }, + "id": 51, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempodb_retention_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/compactor\"}[$__rate_interval])) by (le)) < ($latency_metrics * +Inf)", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempodb_retention_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/compactor\"}[$__rate_interval])) by (le)) < ($latency_metrics * +Inf)", + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempodb_retention_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/compactor\"}[$__rate_interval])) by (le)) < ($latency_metrics * +Inf)", + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempodb_retention_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/compactor\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "hide": false, + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempodb_retention_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/compactor\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "hide": false, + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempodb_retention_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/compactor\"}[$__rate_interval])) ) < ($latency_metrics * -Inf)", + "hide": false, + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "F" + } + ], + "title": "retention duration", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 15, + "y": 7 + }, + "id": 53, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(increase(tempodb_retention_deleted_total{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "deleted", + "refId": "A" + }, + { + "expr": "sum(increase(tempodb_retention_marked_for_deletion_total{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "marked_for_deletion", + "refId": "B" + } + ], + "title": "retention", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 18, + "y": 7 + }, + "id": 70, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Container Restarts", + "type": "timeseries" + } + ], + "title": "Maintenance", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 21, + "panels": [ + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 3 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{route=~\".*api_traces_traceid\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "title": "Queries/Sec (cortex-gw)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 4, + "y": 3 + }, + "id": 78, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "refId": "C" + } + ], + "title": "Query Latency (Gateway)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 3 + }, + "id": 97, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tempo_request_duration_seconds_count{route=~\".*api_search.*\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A", + "stepMode": "min" + } + ], + "title": "Search Queries/Sec (cortex-gw)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 16, + "y": 3 + }, + "id": 93, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A", + "stepMode": "min" + }, + { + "exemplar": true, + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_search.*\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "", + "legendFormat": ".9", + "refId": "B", + "stepMode": "min" + }, + { + "exemplar": true, + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_search.*\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "", + "legendFormat": ".5", + "refId": "C", + "stepMode": "min" + } + ], + "title": "Search Query Latency (Gateway)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 8 + }, + "id": 90, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", route=~\".*api_traces_traceid\", job=\"$namespace/query-frontend\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "title": "Queries/Sec (Query Frontend)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 4, + "y": 8 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "refId": "C" + } + ], + "title": "Query Latency (Query Frontend)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 8 + }, + "id": 98, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", route=~\".*api_search.*\", job=\"$namespace/query-frontend\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A", + "stepMode": "min" + } + ], + "title": "Search Queries/Sec (Query Frontend)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 16, + "y": 8 + }, + "id": 94, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A", + "stepMode": "min" + }, + { + "exemplar": true, + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "refId": "B", + "stepMode": "min" + }, + { + "exemplar": true, + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "refId": "C", + "stepMode": "min" + } + ], + "title": "Search Query Latency (Query Frontend)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 13 + }, + "id": 91, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{route=~\"querier_.*api_traces_traceid\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "title": "Queries/Sec (Querier)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 4, + "y": 13 + }, + "id": 89, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_traces_traceid\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "refId": "C" + } + ], + "title": "Query Latency (Querier)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 13 + }, + "id": 99, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tempo_request_duration_seconds_count{route=~\"querier_.*api_search.*\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A", + "stepMode": "min" + } + ], + "title": "Search Queries/Sec (Querier)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 16, + "y": 13 + }, + "id": 95, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A", + "stepMode": "min" + }, + { + "exemplar": true, + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "refId": "B", + "stepMode": "min" + }, + { + "exemplar": true, + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "refId": "C", + "stepMode": "min" + } + ], + "title": "Search Query Latency (Querier)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 18 + }, + "id": 92, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{route=\"/tempopb.Querier/FindTraceByID\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "title": "Queries/Sec (Ingester)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 4, + "y": 18 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=\"/tempopb.Querier/FindTraceByID\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=\"/tempopb.Querier/FindTraceByID\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=\"/tempopb.Querier/FindTraceByID\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "refId": "C" + } + ], + "title": "Query Latency (Ingester)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 18 + }, + "id": 100, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tempo_request_duration_seconds_count{route=~\"/tempopb.Querier/Search.*\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A", + "stepMode": "min" + } + ], + "title": "Search Queries/Sec (Ingester)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 16, + "y": 18 + }, + "id": 96, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=~\"/tempopb.Querier/Search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A", + "stepMode": "min" + }, + { + "exemplar": true, + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=~\"/tempopb.Querier/Search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "refId": "B", + "stepMode": "min" + }, + { + "exemplar": true, + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=~\"/tempopb.Querier/Search.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "refId": "C", + "stepMode": "min" + } + ], + "title": "Search Query Latency (Ingester)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 23 + }, + "id": 102, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "sum(rate(tempo_request_duration_seconds_count{route=~\".*api_metrics_summary\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "range": true, + "refId": "A" + } + ], + "title": "Metrics Summary QPS (cortex-gw)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 4, + "y": 23 + }, + "id": 103, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_metrics_summary\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_metrics_summary\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\".*api_metrics_summary\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C" + } + ], + "title": "Metrics Summary Latency (Gateway)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 23 + }, + "id": 110, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", route=~\".*api_metrics.*\", job=\"$namespace/query-frontend\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "range": true, + "refId": "A", + "stepMode": "min" + } + ], + "title": "Metrics Summary QPS (Query Frontend)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 16, + "y": 23 + }, + "id": 111, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_metrics.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A", + "stepMode": "min" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_metrics.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "B", + "stepMode": "min" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/query-frontend\", route=~\".*api_metrics.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C", + "stepMode": "min" + } + ], + "title": "Metrics Summary Latency (Query Frontend)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 28 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "sum(rate(tempo_request_duration_seconds_count{route=~\"querier_.*api_metrics.*\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}} {{route}}", + "range": true, + "refId": "A" + } + ], + "title": "Metrics Summary QPS (Querier)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 4, + "y": 28 + }, + "id": 113, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_metrics.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_metrics.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", route=~\"querier_.*api_metrics.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C" + } + ], + "title": "Metrics Summary Latency (Querier)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 28 + }, + "id": 106, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "sum(rate(tempo_request_duration_seconds_count{route=\"/tempopb.MetricsGenerator/GetMetrics\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/metrics-generator\"}[$__rate_interval])) by (status_code)", + "hide": false, + "interval": "", + "legendFormat": "{{status_code}}", + "range": true, + "refId": "A" + } + ], + "title": "GetMetrics QPS (Metrics Generator)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 16, + "y": 28 + }, + "id": 107, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/metrics-generator\", route=\"/tempopb.MetricsGenerator/GetMetrics\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/metrics-generator\", route=\"/tempopb.MetricsGenerator/GetMetrics\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/metrics-generator\", route=\"/tempopb.MetricsGenerator/GetMetrics\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C" + } + ], + "title": "GetMetrics Latency (Metrics Generator)", + "type": "timeseries" + } + ], + "title": "Read", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 19, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 9 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P666011C0B63BDCA4" + }, + "editorMode": "code", + "expr": "sum by(cluster) (rate(tempo_distributor_spans_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Spans Received", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 9 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "builder", + "expr": "sum by(cluster) (rate(tempo_ingester_traces_created_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Traces Created", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 9 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "expr": "sum(increase(tempo_ingester_blocks_flushed_total{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\"}[1h]))", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Blocks Flushed (1h)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 9, + "y": 9 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "expr": "increase(tempo_ingester_failed_flushes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]) > 0", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Failed Flushes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 12, + "y": 9 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "expr": "sum(increase(tempo_ingester_blocks_cleared_total{cluster=\"$cluster\", namespace=\"$namespace\"}[1h]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Blocks Cleared (1h)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 6, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 15, + "y": 9 + }, + "id": 36, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(tempo_ingester_flush_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(tempo_ingester_flush_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(tempo_ingester_flush_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "refId": "C" + } + ], + "title": "Flush Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 6, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 18, + "y": 9 + }, + "id": 114, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "sum(tempo_distributor_queue_length{cluster=~\"$cluster\", namespace=~\"$namespace\", job=~\".*/distributor\"}) by (name)", + "interval": "", + "legendFormat": "{{name}}", + "range": true, + "refId": "A" + } + ], + "title": "Distributor Queue Length", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 2, + "x": 21, + "y": 9 + }, + "id": 115, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "sum(rate(tempo_distributor_metrics_generator_pushes_failures_total{namespace=~\"$namespace\", cluster=~\"$cluster\"}[1m])) by (namespace, cluster)", + "interval": "", + "legendFormat": "{{cluster}} {{namespace}}", + "range": true, + "refId": "A" + } + ], + "title": "Pushes Failing (Distributor)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 10, + "w": 7, + "x": 0, + "y": 14 + }, + "id": 71, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (status_code)", + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "title": "Pushes/sec (gateway)", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 7, + "y": 14 + }, + "id": 72, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "expr": "sum(rate(tempo_receiver_accepted_spans{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "accepted", + "refId": "A" + }, + { + "expr": "sum(rate(tempo_receiver_refused_spans{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "refused", + "refId": "B" + } + ], + "title": "OTEL Distributor Spans/second", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 13, + "y": 14 + }, + "id": 79, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le)) > ($latency_metrics * -Inf)", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le)) > ($latency_metrics * -Inf)", + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le)) > ($latency_metrics * -Inf)", + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "hide": false, + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "hide": false, + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))) < ($latency_metrics * -Inf)", + "hide": false, + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "F" + } + ], + "title": "Push Latency (Gateway)", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 13, + "y": 19 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir-ops-03" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le))", + "hide": false, + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C" + } + ], + "title": "Push Latency (Ingester)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 7, + "y": 24 + }, + "id": 109, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "sum(rate(tempo_metrics_generator_spans_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "accepted", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "sum(rate(tempo_metrics_generator_spans_discarded_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (reason)", + "interval": "", + "legendFormat": "refused {{reason}}", + "range": true, + "refId": "B" + } + ], + "title": "Generator Spans/second", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 13, + "y": 24 + }, + "id": 108, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d452322apre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/metrics-generator\", route=~\"/tempopb.MetricsGenerator/PushSpans\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.9, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/metrics-generator\", route=~\"/tempopb.MetricsGenerator/PushSpans\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".9", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "cortex-ops-01" + }, + "editorMode": "code", + "expr": "histogram_quantile(.5, sum(rate(tempo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/metrics-generator\", route=~\"/tempopb.MetricsGenerator/PushSpans\"}[$__rate_interval])) by (le))", + "interval": "", + "legendFormat": ".5", + "range": true, + "refId": "C" + } + ], + "title": "Push Latency (Generator)", + "type": "timeseries" + } + ], + "title": "Write", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 74, + "panels": [ + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 75, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(rate(tempo_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (status_code, method)", + "interval": "", + "legendFormat": "{{status_code}}-{{method}}", + "refId": "A" + } + ], + "title": "Requests/Second", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 76, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (method, le))", + "interval": "", + "legendFormat": ".99-{{method}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (method, le))", + "interval": "", + "legendFormat": ".9-{{method}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (method, le))", + "interval": "", + "legendFormat": ".5-{{method}}", + "refId": "C" + } + ], + "title": "Latency By Operation", + "type": "timeseries" + } + ], + "title": "Memcached", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 28, + "panels": [ + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 31, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(rate(tempodb_backend_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (status_code, operation)", + "interval": "", + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "title": "Requests/Second", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "title": "Latency By Operation", + "type": "timeseries" + } + ], + "title": "Backend", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 56, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 17 + }, + "id": 58, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P666011C0B63BDCA4" + }, + "expr": "gauge_memberlist_health_score{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Gossip Health", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 17 + }, + "id": 62, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "tempo_memberlist_client_cluster_node_health_score{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Node Health", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 17 + }, + "id": 63, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "min(tempo_memberlist_client_cluster_members_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"})", + "interval": "", + "legendFormat": "min", + "refId": "A" + }, + { + "expr": "max(tempo_memberlist_client_cluster_members_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"})", + "interval": "", + "legendFormat": "max", + "refId": "B" + } + ], + "title": "Member Count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 17 + }, + "id": 60, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P666011C0B63BDCA4" + }, + "expr": "min(tempo_memberlist_client_kv_store_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"})", + "interval": "", + "legendFormat": "min", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P666011C0B63BDCA4" + }, + "expr": "max(tempo_memberlist_client_kv_store_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"})", + "interval": "", + "legendFormat": "max", + "refId": "B" + } + ], + "title": "KV Store Count", + "type": "timeseries" + } + ], + "title": "Ring/Memberlist", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 69, + "panels": [ + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 77, + "options": { + "graph": { + + }, + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.0-14141pre", + "targets": [ + { + "expr": "sum(rate(tempo_vulture_trace_error_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (error) / ignoring (error) group_left sum(rate(tempo_vulture_trace_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "{{error}}", + "refId": "A" + } + ], + "title": "Vulture Query Errors", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 67, + "options": { + "displayMode": "gradient", + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "text": { + + }, + "valueMode": "color" + }, + "pluginVersion": "11.3.0-75324", + "targets": [ + { + "expr": "sum(rate(tempo_vulture_trace_error_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (error) / ignoring (error) group_left sum(rate(tempo_vulture_trace_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "{{secondsago}}", + "refId": "A" + } + ], + "title": "Average Vulture Query Errors", + "type": "bargauge" + } + ], + "title": "Vulture", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 81, + "panels": [ + { + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 19 + }, + "id": 83, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(rate(tempodb_compaction_objects_combined_total{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/compactor\"}[$__rate_interval])) by (level)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Objects Combined / s", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$ds" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 19 + }, + "id": 85, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "expr": "sum(rate(tempodb_compaction_objects_written_total{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/compactor\"}[$__rate_interval])) by (level)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Objects Written / s", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 19 + }, + "id": 88, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P666011C0B63BDCA4" + }, + "editorMode": "builder", + "expr": "sum(rate(tempodb_compaction_bytes_written_total{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/compactor\"}[$__rate_interval])) by (level)", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Bytes Written / s", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 19 + }, + "id": 86, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0-d373beebpre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P666011C0B63BDCA4" + }, + "editorMode": "code", + "expr": "sum(increase(tempodb_compaction_blocks_total{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/compactor\"}[5m])) by (level)", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Blocks Compacted", + "type": "timeseries" + } + ], + "title": "Compactor", + "type": "row" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "tempo" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "ds", + "options": [ + + ], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "loki-ops", + "value": "loki-ops" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "logsds", + "options": [ + + ], + "query": "loki", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "ops-eu-south-0", + "value": "ops-eu-south-0" + }, + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "definition": "label_values(tempo_build_info, cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": { + "query": "label_values(tempo_build_info, cluster)", + "refId": "ops-cortex-cluster-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "tempo-ops-01", + "value": "tempo-ops-01" + }, + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "definition": "label_values(tempo_build_info{cluster=~'$cluster'}, namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": { + "query": "label_values(tempo_build_info{cluster=~'$cluster'}, namespace)", + "refId": "ops-cortex-namespace-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "hide": 0, + "includeAll": true, + "multi": false, + "name": "component", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "compactor", + "value": "compactor" + }, + { + "selected": false, + "text": "distributor", + "value": "distributor" + }, + { + "selected": false, + "text": "ingester", + "value": "ingester" + }, + { + "selected": false, + "text": "metrics-generator", + "value": "metrics-generator" + }, + { + "selected": false, + "text": "query-frontend", + "value": "query-frontend" + }, + { + "selected": false, + "text": "querier", + "value": "querier" + }, + { + "selected": false, + "text": "cortex-gw", + "value": "cortex-gw" + }, + { + "selected": false, + "text": "cortex-gw-internal", + "value": "cortex-gw-internal" + } + ], + "query": "compactor,distributor,ingester,metrics-generator,query-frontend,querier,cortex-gw,cortex-gw-internal", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": true, + "text": "native", + "value": "-1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": true, + "text": "native", + "value": "-1" + }, + { + "selected": false, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Tempo Operational", + "uid": "a6175b9cc7ec20591890117c39580030", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/tempo/tempo-reads.json b/charts/meta-monitoring/src/dashboards/tempo/tempo-reads.json new file mode 100644 index 0000000..2564b7e --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/tempo/tempo-reads.json @@ -0,0 +1,1612 @@ +{ + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "tempo" + ], + "targetBlank": false, + "title": "Tempo Dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (route)", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (route)", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"querier_api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route)", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (le,endpoint)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (le,endpoint)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_querier_external_endpoint_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (endpoint) * 1e3 / sum(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (endpoint)", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier External Endpoint", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/tempopb.Querier/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route)", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "tempo" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ + + ], + "query": "label_values(tempo_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ + + ], + "query": "label_values(tempo_build_info{cluster=~'$cluster'}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Tempo / Reads", + "uid": "", + "version": 0 +} \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/tempo/tempo-resources.json b/charts/meta-monitoring/src/dashboards/tempo/tempo-resources.json new file mode 100644 index 0000000..57a6ff7 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/tempo/tempo-resources.json @@ -0,0 +1,2333 @@ +{ + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "tempo" + ], + "targetBlank": false, + "title": "Tempo Dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\", resource=\"cpu\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\", resource=\"memory\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\", resource=\"cpu\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\", resource=\"memory\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\", resource=\"cpu\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\", resource=\"memory\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\", resource=\"cpu\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\", resource=\"memory\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Metrics-generator", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\", resource=\"cpu\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\", resource=\"memory\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\", resource=\"cpu\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\", resource=\"memory\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\", resource=\"cpu\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + }, + { + "alias": "request", + "color": "#FCE300", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\", resource=\"memory\"} > 0)", + "format": "time_series", + "interval": "1m", + "legendFormat": "request", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Compactor", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "tempo" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ + + ], + "query": "label_values(tempo_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ + + ], + "query": "label_values(tempo_build_info{cluster=~'$cluster'}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Tempo / Resources", + "uid": "", + "version": 0 +} \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/tempo/tempo-rollout-progress.json b/charts/meta-monitoring/src/dashboards/tempo/tempo-rollout-progress.json new file mode 100644 index 0000000..3481773 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/tempo/tempo-rollout-progress.json @@ -0,0 +1,1555 @@ +{ + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "tempo" + ], + "targetBlank": false, + "title": "Tempo Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "yellow", + "value": null + }, + { + "color": "yellow", + "value": 0.999 + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "percentunit" + } + }, + "fill": 1, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 0 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "options": { + "displayMode": "basic", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n sum by(tempo_service) (\n label_replace(\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\",statefulset=~\".*(cortex-gw|distributor|ingester|query-frontend|querier|compactor|metrics-generator).*\"},\n \"tempo_service\", \"$1\", \"statefulset\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n /\n sum by(tempo_service) (\n label_replace(\n kube_statefulset_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"tempo_service\", \"$1\", \"statefulset\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n) and (\n sum by(tempo_service) (\n label_replace(\n kube_statefulset_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"tempo_service\", \"$1\", \"statefulset\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n > 0\n)\n", + "format": null, + "intervalFactor": null, + "legendFormat": "{{tempo_service}}", + "legendLink": null, + "step": null + }, + { + "expr": "(\n sum by(tempo_service) (\n label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\",deployment=~\".*(cortex-gw|distributor|ingester|query-frontend|querier|compactor|metrics-generator).*\"},\n \"tempo_service\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n /\n sum by(tempo_service) (\n label_replace(\n kube_deployment_spec_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"tempo_service\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n) and (\n sum by(tempo_service) (\n label_replace(\n kube_deployment_spec_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"tempo_service\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n > 0\n)\n", + "format": null, + "intervalFactor": null, + "legendFormat": "{{tempo_service}}", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rollout progress", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "bargauge", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 2xx", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.2 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 0 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 4xx", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 0 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 5xx", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.2 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Writes 99th latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 4 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 2xx", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.01 + }, + { + "color": "red", + "value": 0.05 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 4 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 4xx", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 4 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 5xx", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2.5 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Reads 99th latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "noValue": "All healthy", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2 + } + ] + }, + "unit": "short" + }, + "overrides": [ + + ] + }, + "fill": 1, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 8 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "options": { + "text": { + "titleSize": 14, + "valueSize": 14 + } + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\", deployment=~\".*(cortex-gw|distributor|ingester|query-frontend|querier|compactor|metrics-generator).*\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{deployment}}", + "legendLink": null, + "step": null + }, + { + "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\", statefulset=~\".*(cortex-gw|distributor|ingester|query-frontend|querier|compactor|metrics-generator).*\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\", statefulset=~\".*(cortex-gw|distributor|ingester|query-frontend|querier|compactor|metrics-generator).*\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{statefulset}}", + "legendLink": null, + "step": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Unhealthy pods", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "r.*" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 10, + "y": 8 + }, + "id": 11, + "targets": [ + { + "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\".*(cortex-gw|distributor|ingester|query-frontend|querier|compactor|metrics-generator).*\"},\n \"version\", \"$1\", \"image\", \".*:(.+)-.*\"\n )\n)\n", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pods count per version", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "valueLabel": "version" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { + + }, + "sort": [ + { + "field": "container" + } + ] + } + } + ], + "type": "table" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))[1h:])\n)\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "writes", + "legendLink": null + }, + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}))[1h:])\n)\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "reads", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency vs 24h ago", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "refresh": "", + "rows": null, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "tempo" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ + + ], + "query": "label_values(tempo_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ + + ], + "query": "label_values(tempo_build_info{cluster=~'$cluster'}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Rollout progress", + "uid": "217b16a8c5966b32c770225dea289b19", + "version": 0 +} \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/tempo/tempo-tenants.json b/charts/meta-monitoring/src/dashboards/tempo/tempo-tenants.json new file mode 100644 index 0000000..87387ed --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/tempo/tempo-tenants.json @@ -0,0 +1,1149 @@ +{ + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "tempo" + ], + "targetBlank": false, + "title": "Tempo Dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})\n) by (limit_name)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Limits", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Tenant info", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "dashes": true, + "fill": 0 + }, + { + "alias": "burst limit", + "dashes": true, + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_distributor_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "received", + "legendLink": null + }, + { + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"ingestion_rate_limit_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"ingestion_rate_limit_bytes\"})\n) by (ingestion_rate_limit_bytes)\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"ingestion_burst_size_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"ingestion_burst_size_bytes\"})\n) by (ingestion_burst_size_bytes)\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "burst limit", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Distributor bytes/s", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_distributor_spans_received_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "accepted", + "legendLink": null + }, + { + "expr": "sum(rate(tempo_discarded_spans_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval])) by (reason)", + "format": "time_series", + "interval": "1m", + "legendFormat": "refused {{ reason }}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Distributor spans/s", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "global limit", + "dashes": true, + "fill": 0 + }, + { + "alias": "local limit", + "dashes": true, + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(tempo_ingester_live_traces{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",tenant=\"$tenant\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "live traces", + "legendLink": null + }, + { + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"max_global_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"max_global_traces_per_user\"})\n) by (max_global_traces_per_user)\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "global limit", + "legendLink": null + }, + { + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"max_local_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"max_local_traces_per_user\"})\n) by (max_local_traces_per_user)\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "local limit", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Live traces", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingestion", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",tenant=\"$tenant\",op=\"traces\"}[$__rate_interval])) by (status)", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{ status }}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Queries/s (ID lookup)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",tenant=\"$tenant\",op=\"search\"}[$__rate_interval])) by (status)", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{ status }}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Queries/s (search)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",tenant=\"$tenant\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "length", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Blockslist length", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tempodb_compaction_outstanding_blocks{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",tenant=\"$tenant\"})\n/\ncount(tempo_build_info{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "blocks", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Outstanding compactions", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_metrics_generator_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\",tenant=\"$tenant\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "rate", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes/s", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "dashes": true, + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tempo_metrics_generator_registry_active_series{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\",tenant=\"$tenant\"})", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{ tenant }}", + "legendLink": null + }, + { + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"metrics_generator_max_active_series\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"metrics_generator_max_active_series\"})\n) by (metrics_generator_max_active_series)\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Active series", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Metrics generator", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "tempo" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ + + ], + "query": "label_values(tempo_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ + + ], + "query": "label_values(tempo_build_info{cluster=~'$cluster'}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "tenant", + "multi": false, + "name": "tenant", + "options": [ + + ], + "query": "label_values(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"}, tenant)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Tempo / Tenants", + "uid": "", + "version": 0 +} \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/tempo/tempo-writes.json b/charts/meta-monitoring/src/dashboards/tempo/tempo-writes.json new file mode 100644 index 0000000..5d9b29b --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/tempo/tempo-writes.json @@ -0,0 +1,1732 @@ +{ + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "tempo" + ], + "targetBlank": false, + "title": "Tempo Dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (grpc_status) (\n rate(\n label_replace(\n {cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", __name__=~\"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_[0-9]+\"},\n \"grpc_status\", \"$1\", \"__name__\", \"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_(.+)\"\n )\n [$__interval:$__interval]\n )\n)\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{grpc_status}}", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "id": 4, + "options": { + "content": "Visit [Status codes and their use in gRPC](https://github.com/grpc/grpc/blob/master/doc/statuscodes.md)\n\nCode | Number | Description\n---|---|---\nOK | 0 | Not an error; returned on success.\nCANCELLED | 1 | The operation was cancelled, typically by the caller.\nUNKNOWN | 2 | Unknown error. For example, this error may be returned when a Status value received from another address space belongs to an error space that is not known in this address space. Also errors raised by APIs that do not return enough error information may be converted to this error.\nINVALID_ARGUMENT | 3 | The client specified an invalid argument. Note that this differs from FAILED_PRECONDITION. INVALID_ARGUMENT indicates arguments that are problematic regardless of the state of the system (e.g., a malformed file name).\nDEADLINE_EXCEEDED | 4 | The deadline expired before the operation could complete. For operations that change the state of the system, this error may be returned even if the operation has completed successfully. For example, a successful response from a server could have been delayed long\nNOT_FOUND | 5 | Some requested entity (e.g., file or directory) was not found. Note to server developers: if a request is denied for an entire class of users, such as gradual feature rollout or undocumented allowlist, NOT_FOUND may be used. If a request is denied for some users within a class of users, such as user-based access control, PERMISSION_DENIED must be used.\nALREADY_EXISTS | 6 | The entity that a client attempted to create (e.g., file or directory) already exists.\nPERMISSION_DENIED | 7 | The caller does not have permission to execute the specified operation. PERMISSION_DENIED must not be used for rejections caused by exhausting some resource (use RESOURCE_EXHAUSTED instead for those errors). PERMISSION_DENIED must not be used if the caller can not be identified (use UNAUTHENTICATED instead for those errors). This error code does not imply the request is valid or the requested entity exists or satisfies other pre-conditions.\nRESOURCE_EXHAUSTED | 8 | Some resource has been exhausted, perhaps a per-user quota, or perhaps the entire file system is out of space.\nFAILED_PRECONDITION | 9 | The operation was rejected because the system is not in a state required for the operation's execution. For example, the directory to be deleted is non-empty, an rmdir operation is applied to a non-directory, etc. Service implementors can use the following guidelines to decide between FAILED_PRECONDITION, ABORTED, and UNAVAILABLE: (a) Use UNAVAILABLE if the client can retry just the failing call. (b) Use ABORTED if the client should retry at a higher level (e.g., when a client-specified test-and-set fails, indicating the client should restart a read-modify-write sequence). (c) Use FAILED_PRECONDITION if the client should not retry until the system state has been explicitly fixed. E.g., if an \"rmdir\" fails because the directory is non-empty, FAILED_PRECONDITION should be returned since the client should not retry unless the files are deleted from the directory.\nABORTED | 10 | The operation was aborted, typically due to a concurrency issue such as a sequencer check failure or transaction abort. See the guidelines above for deciding between FAILED_PRECONDITION, ABORTED, and UNAVAILABLE.\nOUT_OF_RANGE | 11 | The operation was attempted past the valid range. E.g., seeking or reading past end-of-file. Unlike INVALID_ARGUMENT, this error indicates a problem that may be fixed if the system state changes. For example, a 32-bit file system will generate INVALID_ARGUMENT if asked to read at an offset that is not in the range [0,2^32-1], but it will generate OUT_OF_RANGE if asked to read from an offset past the current file size. There is a fair bit of overlap between FAILED_PRECONDITION and OUT_OF_RANGE. We recommend using OUT_OF_RANGE (the more specific error) when it applies so that callers who are iterating through a space can easily look for an OUT_OF_RANGE error to detect when they are done.\nUNIMPLEMENTED | 12 | The operation is not implemented or is not supported/enabled in this service.\nINTERNAL | 13 | Internal errors. This means that some invariants expected by the underlying system have been broken. This error code is reserved for serious errors.\nUNAVAILABLE | 14 | The service is currently unavailable. This is most likely a transient condition, which can be corrected by retrying with a backoff. Note that it is not always safe to retry non-idempotent operations.\nDATA_LOSS | 15 | Unrecoverable data loss or corruption.\nUNAUTHENTICATED | 16 | The request does not have valid authentication credentials for the operation.\n", + "mode": "markdown" + }, + "span": 6, + "timeFrom": null, + "timeShift": null, + "title": "gRPC status codes", + "transparent": false, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Envoy Proxy", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempo_receiver_accepted_spans{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "accepted", + "legendLink": null + }, + { + "expr": "sum(rate(tempo_receiver_refused_spans{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "refused", + "legendLink": null + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Spans/Second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_distributor_push_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_distributor_push_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached - Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend - Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached - Compactor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 99th", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} 50th", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{route}} Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend - Compactor", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "tempo" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ + + ], + "query": "label_values(tempo_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ + + ], + "query": "label_values(tempo_build_info{cluster=~'$cluster'}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Tempo / Writes", + "uid": "", + "version": 0 +} \ No newline at end of file diff --git a/charts/meta-monitoring/src/rules/loki-rules.yaml b/charts/meta-monitoring/src/rules/loki-rules.yaml index b0db8ba..24d9f5d 100644 --- a/charts/meta-monitoring/src/rules/loki-rules.yaml +++ b/charts/meta-monitoring/src/rules/loki-rules.yaml @@ -1,53 +1,39 @@ groups: - - name: "loki_rules" + - name: loki_rules rules: - - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m])) - by (le, cluster, job))" - record: "cluster_job:loki_request_duration_seconds:99quantile" - - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m])) - by (le, cluster, job))" - record: "cluster_job:loki_request_duration_seconds:50quantile" - - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[5m])) - by (cluster, job)" - record: "cluster_job:loki_request_duration_seconds:avg" - - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job)" - record: "cluster_job:loki_request_duration_seconds_bucket:sum_rate" - - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job)" - record: "cluster_job:loki_request_duration_seconds_sum:sum_rate" - - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job)" - record: "cluster_job:loki_request_duration_seconds_count:sum_rate" - - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m])) - by (le, cluster, job, route))" - record: "cluster_job_route:loki_request_duration_seconds:99quantile" - - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m])) - by (le, cluster, job, route))" - record: "cluster_job_route:loki_request_duration_seconds:50quantile" - - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route) - / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)" - record: "cluster_job_route:loki_request_duration_seconds:avg" - - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, job, - route)" - record: "cluster_job_route:loki_request_duration_seconds_bucket:sum_rate" - - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, job, route)" - record: "cluster_job_route:loki_request_duration_seconds_sum:sum_rate" - - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, job, route)" - record: "cluster_job_route:loki_request_duration_seconds_count:sum_rate" - - expr: "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[5m])) - by (le, cluster, namespace, job, route))" - record: "cluster_namespace_job_route:loki_request_duration_seconds:99quantile" - - expr: "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[5m])) - by (le, cluster, namespace, job, route))" - record: "cluster_namespace_job_route:loki_request_duration_seconds:50quantile" - - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace, - job, route) / sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, - namespace, job, route)" - record: "cluster_namespace_job_route:loki_request_duration_seconds:avg" - - expr: "sum(rate(loki_request_duration_seconds_bucket[5m])) by (le, cluster, namespace, - job, route)" - record: "cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate" - - expr: "sum(rate(loki_request_duration_seconds_sum[5m])) by (cluster, namespace, - job, route)" - record: "cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate" - - expr: "sum(rate(loki_request_duration_seconds_count[5m])) by (cluster, namespace, - job, route)" - record: "cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate" \ No newline at end of file + - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:loki_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:loki_request_duration_seconds:50quantile + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:loki_request_duration_seconds:avg + - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:loki_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:loki_request_duration_seconds_sum:sum_rate + - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:loki_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:loki_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:loki_request_duration_seconds:50quantile + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:loki_request_duration_seconds:avg + - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job, route) + record: cluster_job_route:loki_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) + record: cluster_job_route:loki_request_duration_seconds_sum:sum_rate + - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:loki_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:loki_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:loki_request_duration_seconds:50quantile + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:loki_request_duration_seconds:avg + - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route) + record: cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate + - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate diff --git a/charts/meta-monitoring/src/rules/mimir-rules.yaml b/charts/meta-monitoring/src/rules/mimir-rules.yaml new file mode 100644 index 0000000..a1cd08e --- /dev/null +++ b/charts/meta-monitoring/src/rules/mimir-rules.yaml @@ -0,0 +1,461 @@ +groups: + - name: mimir_api_1 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds_count:sum_rate + - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds:sum_rate + - name: mimir_api_2 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate + - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds:sum_rate + - name: mimir_api_3 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate + - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds:sum_rate + - name: mimir_querier_api + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate + - name: mimir_storage + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_kv_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_kv_request_duration_seconds:50quantile + - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds:avg + - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate + - name: mimir_queries + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_retries:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_retries:50quantile + - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_retries:avg + - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate + - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_retries_sum:sum_rate + - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_retries_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate + - name: mimir_ingester_queries + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_series:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_series:50quantile + - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series:avg + - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_samples:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_samples:50quantile + - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples:avg + - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_exemplars:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_exemplars:50quantile + - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_exemplars:avg + - expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate + - name: mimir_received_samples + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) + record: cluster_namespace_job:cortex_distributor_received_samples:rate5m + - name: mimir_exemplars_in + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m])) + record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m + - name: mimir_received_exemplars + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m])) + record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m + - name: mimir_exemplars_ingested + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m])) + record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m + - name: mimir_exemplars_appended + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m])) + record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m + - name: mimir_scaling_rules + rules: + - expr: | + # Convenience rule to get the number of replicas for both a deployment and a statefulset. + # Multi-zone deployments are grouped together removing the "zone-X" suffix. + sum by (cluster, namespace, deployment) ( + label_replace( + kube_deployment_spec_replicas, + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + or + sum by (cluster, namespace, deployment) ( + label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") + ) + record: cluster_namespace_deployment:actual_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by (cluster, namespace) ( + cluster_namespace_job:cortex_distributor_received_samples:rate5m + )[24h:] + ) + / 240000 + ) + labels: + deployment: distributor + reason: sample_rate + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) + * 0.59999999999999998 / 240000 + ) + labels: + deployment: distributor + reason: sample_rate_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by (cluster, namespace) ( + cluster_namespace_job:cortex_distributor_received_samples:rate5m + )[24h:] + ) + * 3 / 80000 + ) + labels: + deployment: ingester + reason: sample_rate + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by(cluster, namespace) ( + cortex_ingester_memory_series + )[24h:] + ) + / 1500000 + ) + labels: + deployment: ingester + reason: active_series + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"}) + * 3 * 0.59999999999999998 / 1500000 + ) + labels: + deployment: ingester + reason: active_series_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) + * 0.59999999999999998 / 80000 + ) + labels: + deployment: ingester + reason: sample_rate_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + (sum by (cluster, namespace) ( + cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/.*ingester.*"} + ) / 4) + / + avg by (cluster, namespace) ( + memcached_limit_bytes{job=~".+/.*memcached"} + ) + ) + labels: + deployment: memcached + reason: active_series + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate + - expr: | + # Convenience rule to get the CPU request for both a deployment and a statefulset. + # Multi-zone deployments are grouped together removing the "zone-X" suffix. + # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 + # that remove resource metrics, ref: + # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 + # - https://github.com/kubernetes/kube-state-metrics/pull/1004 + # + # This is the old expression, compatible with kube-state-metrics < v2.0.0, + # where kube_pod_container_resource_requests_cpu_cores was removed: + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests_cpu_cores, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + or + # This expression is compatible with kube-state-metrics >= v1.4.0, + # where kube_pod_container_resource_requests was introduced. + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests{resource="cpu"}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum + - expr: | + # Jobs should be sized to their CPU usage. + # We do this by comparing 99th percentile usage over the last 24hrs to + # their current provisioned #replicas and resource requests. + ceil( + cluster_namespace_deployment:actual_replicas:count + * + quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) + / + cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum + ) + labels: + reason: cpu_usage + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + # Convenience rule to get the Memory utilization for both a deployment and a statefulset. + # Multi-zone deployments are grouped together removing the "zone-X" suffix. + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + container_memory_usage_bytes{image!=""}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + record: cluster_namespace_deployment:container_memory_usage_bytes:sum + - expr: | + # Convenience rule to get the Memory request for both a deployment and a statefulset. + # Multi-zone deployments are grouped together removing the "zone-X" suffix. + # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 + # that remove resource metrics, ref: + # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 + # - https://github.com/kubernetes/kube-state-metrics/pull/1004 + # + # This is the old expression, compatible with kube-state-metrics < v2.0.0, + # where kube_pod_container_resource_requests_memory_bytes was removed: + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests_memory_bytes, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + or + # This expression is compatible with kube-state-metrics >= v1.4.0, + # where kube_pod_container_resource_requests was introduced. + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests{resource="memory"}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum + - expr: | + # Jobs should be sized to their Memory usage. + # We do this by comparing 99th percentile usage over the last 24hrs to + # their current provisioned #replicas and resource requests. + ceil( + cluster_namespace_deployment:actual_replicas:count + * + quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) + / + cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum + ) + labels: + reason: memory_usage + record: cluster_namespace_deployment_reason:required_replicas:count + - name: mimir_alertmanager_rules + rules: + - expr: | + sum by (cluster, job, pod) (cortex_alertmanager_alerts) + record: cluster_job_pod:cortex_alertmanager_alerts:sum + - expr: | + sum by (cluster, job, pod) (cortex_alertmanager_silences) + record: cluster_job_pod:cortex_alertmanager_silences:sum + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) + record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) + record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m + - expr: | + sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) + record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m + - expr: | + sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) + record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) + record: cluster_job:cortex_alertmanager_state_replication_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) + record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) + record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) + record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m + - name: mimir_ingester_rules + rules: + - expr: | + sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m])) + record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m diff --git a/charts/meta-monitoring/src/rules/tempo-rules.yaml b/charts/meta-monitoring/src/rules/tempo-rules.yaml new file mode 100644 index 0000000..2597498 --- /dev/null +++ b/charts/meta-monitoring/src/rules/tempo-rules.yaml @@ -0,0 +1,15 @@ +"groups": +- "name": "tempo_rules" + "rules": + - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))" + "record": "cluster_namespace_job_route:tempo_request_duration_seconds:99quantile" + - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))" + "record": "cluster_namespace_job_route:tempo_request_duration_seconds:50quantile" + - "expr": "sum(rate(tempo_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(tempo_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)" + "record": "cluster_namespace_job_route:tempo_request_duration_seconds:avg" + - "expr": "sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)" + "record": "cluster_namespace_job_route:tempo_request_duration_seconds_bucket:sum_rate" + - "expr": "sum(rate(tempo_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route)" + "record": "cluster_namespace_job_route:tempo_request_duration_seconds_sum:sum_rate" + - "expr": "sum(rate(tempo_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)" + "record": "cluster_namespace_job_route:tempo_request_duration_seconds_count:sum_rate" diff --git a/charts/meta-monitoring/templates/agent/config.yaml b/charts/meta-monitoring/templates/agent/config.yaml deleted file mode 100644 index 36c111d..0000000 --- a/charts/meta-monitoring/templates/agent/config.yaml +++ /dev/null @@ -1,395 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: agent-configmap - namespace: {{ .Release.Namespace }} -data: - config.river: | - discovery.kubernetes "pods" { - role = "pod" - namespaces { - own_namespace = true - names = [ {{ include "agent.namespaces" . }} ] - } - } - - discovery.relabel "rename_meta_labels" { - targets = discovery.kubernetes.pods.targets - - rule { - source_labels = ["__meta_kubernetes_namespace"] - target_label = "namespace" - } - rule { - source_labels = ["__meta_kubernetes_pod_name"] - target_label = "pod" - } - rule { - source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"] - separator = "/" - regex = "(.*)/(.*)/(.*)" - replacement = "${1}/${2}-${3}" - target_label = "job" - } - rule { - target_label = "cluster" - replacement = "{{- .Values.clusterLabelValue -}}" - } - } - - {{- if or .Values.local.logs.enabled .Values.cloud.logs.enabled }} - // Logs - - {{- if .Values.cloud.logs.enabled }} - remote.kubernetes.secret "logs_credentials" { - namespace = "{{- $.Release.Namespace -}}" - name = "{{- .Values.cloud.logs.secret -}}" - } - {{- end }} - - loki.source.kubernetes "pods" { - clustering { - enabled = true - } - targets = discovery.relabel.rename_meta_labels.output - forward_to = [ {{ include "agent.loki_process_targets" . }} ] - } - - {{- if or (not (empty .Values.logs.retain)) (not (empty .Values.logs.piiRegexes)) }} - loki.process "filter" { - forward_to = [ {{ include "agent.loki_write_targets" . }} ] - - {{- if or (not (empty .Values.logs.retain)) (not (empty .Values.logs.extraLogs)) }} - stage.match { - selector = "{cluster=\"{{- .Values.clusterLabelValue -}}\", namespace=~\"{{- join "|" .Values.namespacesToMonitor -}}|{{- $.Release.Namespace -}}\", pod=~\"loki.*\"} !~ \"{{ include "agent.all_logs" . }}\"" - action = "drop" - } - {{- end }} - - {{- if not (empty .Values.logs.piiRegexes) }} - {{- range .Values.logs.piiRegexes }} - stage.replace { - expression = "{{ .expression }}" - source = "{{ .source }}" - replace = "{{ .replace }}" - } - {{- end }} - {{- end }} - } - {{- end }} - {{- end }} - - {{- if or .Values.local.metrics.enabled .Values.cloud.metrics.enabled }} - // Metrics - - {{- if .Values.cloud.metrics.enabled }} - remote.kubernetes.secret "metrics_credentials" { - namespace = "{{- $.Release.Namespace -}}" - name = "{{- .Values.cloud.metrics.secret -}}" - } - {{- end }} - - discovery.kubernetes "metric_pods" { - role = "pod" - namespaces { - own_namespace = true - names = [ {{ include "agent.all_namespaces" . }} ] - } - } - - discovery.relabel "only_http_metrics" { - targets = discovery.kubernetes.metric_pods.targets - - rule { - source_labels = ["__meta_kubernetes_namespace"] - target_label = "namespace" - } - rule { - source_labels = ["__meta_kubernetes_pod_name"] - target_label = "pod" - } - rule { - source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"] - separator = "/" - regex = "(.*)/(.*)/(.*)" - replacement = "${1}/${2}-${3}" - target_label = "job" - } - rule { - target_label = "cluster" - replacement = "{{- .Values.clusterLabelValue -}}" - } - rule { - source_labels = ["__meta_kubernetes_pod_container_port_number"] - action = "drop" - regex = "9095" - } - } - - prometheus.scrape "pods" { - clustering { - enabled = true - } - targets = discovery.relabel.only_http_metrics.output - forward_to = [ prometheus.relabel.filter.receiver ] - } - - prometheus.relabel "filter" { - rule { - target_label = "cluster" - replacement = "{{- .Values.clusterLabelValue -}}" - } - - rule { - source_labels = ["__name__"] - regex = "({{ include "agent.all_metrics" . }})" - action = "keep" - } - - rule { - source_labels = ["namespace"] - regex = "{{ include "agent.all_namespaces_bar" . }}" - - action = "keep" - } - - forward_to = [ {{ include "agent.prometheus_write_targets" . }} ] - } - {{- if .Values.kubeStateMetrics.enabled }} - - prometheus.scrape "kubeStateMetrics" { - clustering { - enabled = true - } - targets = [ { "__address__" = "{{ .Values.kubeStateMetrics.endpoint }}" } ] - forward_to = [ prometheus.relabel.filter.receiver ] - } - {{- end }} - - // cAdvisor and Kubelet metrics - // Based on https://github.com/Chewie/loutretelecom-manifests/blob/main/manifests/addons/monitoring/config.river - discovery.kubernetes "all_nodes" { - role = "node" - namespaces { - own_namespace = true - names = [ {{ include "agent.namespaces" . }} ] - } - } - - discovery.relabel "all_nodes" { - targets = discovery.kubernetes.all_nodes.targets - rule { - source_labels = ["__meta_kubernetes_node_name"] - target_label = "node" - } - rule { - source_labels = ["__meta_kubernetes_namespace"] - target_label = "namespace" - } - rule { - source_labels = ["__meta_kubernetes_pod_name"] - target_label = "pod" - } - rule { - source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"] - separator = "/" - regex = "(.*)/(.*)/(.*)" - replacement = "${1}/${2}-${3}" - target_label = "job" - } - rule { - target_label = "cluster" - replacement = "{{- .Values.clusterLabelValue -}}" - } - } - - prometheus.scrape "cadvisor" { - clustering { - enabled = true - } - targets = discovery.relabel.all_nodes.output - forward_to = [ prometheus.relabel.filter.receiver ] - - metrics_path = "/metrics/cadvisor" - scheme = "https" - - bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" - tls_config { - ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - } - } - - prometheus.scrape "kubelet" { - clustering { - enabled = true - } - targets = discovery.relabel.all_nodes.output - forward_to = [ prometheus.relabel.filter.receiver ] - - metrics_path = "/metrics" - scheme = "https" - - bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" - tls_config { - ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - } - } - - prometheus.exporter.unix "promexporter" {} - - prometheus.scrape "node_exporter" { - clustering { - enabled = true - } - targets = prometheus.exporter.unix.promexporter.targets - forward_to = [prometheus.relabel.node_exporter.receiver] - - job_name = "node-exporter" - } - - prometheus.relabel "node_exporter" { - forward_to = [ prometheus.relabel.filter.receiver ] - - rule { - replacement = env("HOSTNAME") - target_label = "nodename" - } - rule { - replacement = "node-exporter" - target_label = "job" - } - rule { - source_labels = ["__meta_kubernetes_node_name"] - target_label = "node" - } - rule { - source_labels = ["__meta_kubernetes_namespace"] - target_label = "namespace" - } - rule { - source_labels = ["__meta_kubernetes_pod_name"] - target_label = "pod" - } - rule { - source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"] - separator = "/" - regex = "(.*)/(.*)/(.*)" - replacement = "${1}/${2}-${3}" - target_label = "job" - } - rule { - target_label = "cluster" - replacement = "{{- .Values.clusterLabelValue -}}" - } - } - {{- end }} - - {{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }} - // Traces - - {{- if .Values.cloud.traces.enabled }} - remote.kubernetes.secret "traces_credentials" { - namespace = "{{- $.Release.Namespace -}}" - name = "{{- .Values.cloud.traces.secret -}}" - } - {{- end }} - - // Shamelessly copied from https://github.com/grafana/intro-to-mlt/blob/main/agent/config.river - otelcol.receiver.otlp "otlp_receiver" { - // We don't technically need this, but it shows how to change listen address and incoming port. - // In this case, the Agent is listening on all available bindable addresses on port 4317 (which is the - // default OTLP gRPC port) for the OTLP protocol. - grpc {} - - // We define where to send the output of all ingested traces. In this case, to the OpenTelemetry batch processor - // named 'default'. - output { - traces = [otelcol.processor.batch.default.input] - } - } - - otelcol.receiver.jaeger "jaeger" { - protocols { - thrift_http {} - } - - output { - traces = [otelcol.processor.batch.default.input] - } - } - - // The OpenTelemetry batch processor collects trace spans until a batch size or timeout is met, before sending those - // spans onto another target. This processor is labeled 'default'. - otelcol.processor.batch "default" { - // Wait until we've received 16K of data. - send_batch_size = 16384 - // Or until 2 seconds have elapsed. - timeout = "2s" - // When the Agent has enough batched data, send it to the OpenTelemetry exporter named 'local'. - output { - traces = [ {{ include "agent.tempo_write_targets" . }} ] - } - } - {{- end }} - - {{- if .Values.local.logs.enabled }} - loki.write "local" { - endpoint { - url = "http://loki-write.{{- .Release.Namespace -}}.svc.cluster.local:3100/loki/api/v1/push" - } - } - {{- end }} - - {{- if .Values.local.metrics.enabled }} - prometheus.remote_write "local" { - endpoint { - url = "http://{{- .Release.Name -}}-mimir-nginx.{{- .Release.Namespace -}}.svc:80/api/v1/push" - } - } - {{- end }} - - {{- if .Values.local.traces.enabled }} - otelcol.exporter.otlphttp "local" { - client { - endpoint = "http://{{- .Release.Name -}}-tempo-distributor.{{- .Release.Namespace -}}.svc:4318" - } - } - {{- end }} - - {{- if .Values.cloud.logs.enabled }} - loki.write "cloud" { - endpoint { - url = nonsensitive(remote.kubernetes.secret.logs_credentials.data["endpoint"]) - basic_auth { - username = nonsensitive(remote.kubernetes.secret.logs_credentials.data["username"]) - password = remote.kubernetes.secret.logs_credentials.data["password"] - } - } - } - {{- end }} - - {{- if .Values.cloud.metrics.enabled }} - prometheus.remote_write "cloud" { - endpoint { - url = nonsensitive(remote.kubernetes.secret.metrics_credentials.data["endpoint"]) - basic_auth { - username = nonsensitive(remote.kubernetes.secret.metrics_credentials.data["username"]) - password = remote.kubernetes.secret.metrics_credentials.data["password"] - } - } - } - {{- end }} - - {{- if .Values.cloud.traces.enabled }} - otelcol.exporter.otlphttp "cloud" { - client { - endpoint = nonsensitive(remote.kubernetes.secret.traces_credentials.data["endpoint"]) - auth = otelcol.auth.basic.creds.handler - } - } - - otelcol.auth.basic "creds" { - username = nonsensitive(remote.kubernetes.secret.traces_credentials.data["username"]) - password = remote.kubernetes.secret.traces_credentials.data["password"] - } - {{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl b/charts/meta-monitoring/templates/alloy/_helpers-agent.tpl similarity index 63% rename from charts/meta-monitoring/templates/agent/_helpers-agent.tpl rename to charts/meta-monitoring/templates/alloy/_helpers-agent.tpl index 48d84ab..4b5df3e 100644 --- a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl +++ b/charts/meta-monitoring/templates/alloy/_helpers-agent.tpl @@ -85,4 +85,52 @@ {{- $list = append $list . }} {{- end }} {{- join "|" $list }} +{{- end }} + +{{- define "alloy.logs.namespaces" -}} +{{- $list := list }} +{{- range index .Values "alloy-logs" "namespacesToMonitor" }} +{{- $list = append $list (printf "\"%s\"" .) }} +{{- end }} +{{- join ", " $list }} +{{- end }} + +{{- define "alloy.events.namespaces" -}} +{{- $list := list }} +{{- range index .Values "alloy-events" "namespacesToMonitor" }} +{{- $list = append $list (printf "\"%s\"" .) }} +{{- end }} +{{- join ", " $list }} +{{- end }} + +{{- define "alloy.cadvisor.namespaces" -}} +{{- $list := list }} +{{- range index .Values "alloy-metrics" "cadvisor" "namespacesToMonitor" }} +{{- $list = append $list (printf "\"%s\"" .) }} +{{- end }} +{{- join ", " $list }} +{{- end }} + +{{- define "alloy.ksm.namespaces" -}} +{{- $list := list }} +{{- range index .Values "alloy-metrics" "kube_state_metrics" "namespacesToMonitor" }} +{{- $list = append $list (printf "\"%s\"" .) }} +{{- end }} +{{- join ", " $list }} +{{- end }} + +{{- define "alloy.node_exporter.labelselectors" -}} +{{- $list := list }} +{{- range index .Values "alloy-metrics" "node_exporter" "labelSelectors" }} +{{- $list = append $list (printf "\"%s\"" .) }} +{{- end }} +{{- join ", " $list }} +{{- end }} + +{{- define "alloy.ksm.labelselectors" -}} +{{- $list := list }} +{{- range index .Values "alloy-metrics" "kube_state_metrics" "labelSelectors" }} +{{- $list = append $list (printf "\"%s\"" .) }} +{{- end }} +{{- join ", " $list }} {{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/alloy/events-config.yaml b/charts/meta-monitoring/templates/alloy/events-config.yaml new file mode 100644 index 0000000..9081e35 --- /dev/null +++ b/charts/meta-monitoring/templates/alloy/events-config.yaml @@ -0,0 +1,59 @@ +{{- if index .Values "alloy-events" "enabled" }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: alloy-events-config + namespace: {{ .Release.Namespace }} +data: + config.alloy: | + logging { + level = "info" + format = "logfmt" + } + + loki.source.kubernetes_events "cluster_events" { + job_name = "integrations/kubernetes/eventhandler" + log_format = "logfmt" + namespaces = [{{- include "alloy.events.namespaces" . }}] + forward_to = [ + loki.process.logs_service.receiver, + ] + } + + loki.process "logs_service" { + stage.static_labels { + values = { + cluster = "{{- .Values.clusterLabelValue }}", + environment = "{{- .Values.environmentLabelValue -}}", + } + } + forward_to = [{{ include "agent.loki_write_targets" . }},] + } + + {{- if .Values.cloud.logs.enabled }} + remote.kubernetes.secret "logs_credentials" { + namespace = "{{- $.Release.Namespace -}}" + name = "{{- .Values.cloud.logs.secret -}}" + } + {{- end }} + + {{- if .Values.local.logs.enabled }} + loki.write "local" { + endpoint { + url = "http://loki-write.{{- .Release.Namespace -}}.svc.cluster.local:3100/loki/api/v1/push" + } + } + {{- end }} + + {{- if .Values.cloud.logs.enabled }} + loki.write "cloud" { + endpoint { + url = nonsensitive(remote.kubernetes.secret.logs_credentials.data["endpoint"]) + basic_auth { + username = nonsensitive(remote.kubernetes.secret.logs_credentials.data["username"]) + password = remote.kubernetes.secret.logs_credentials.data["password"] + } + } + } + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/alloy/logs-config.yaml b/charts/meta-monitoring/templates/alloy/logs-config.yaml new file mode 100644 index 0000000..acab6ce --- /dev/null +++ b/charts/meta-monitoring/templates/alloy/logs-config.yaml @@ -0,0 +1,58 @@ +{{- if index .Values "alloy-logs" "enabled" }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: alloy-logs-config + namespace: {{ .Release.Namespace }} +data: + config.alloy: | + remote.kubernetes.configmap "logs" { + namespace = "{{ .Release.Namespace }}" + name = "logs" + } + + import.string "logs" { + content = remote.kubernetes.configmap.logs.data.logs + } + + logs.pod_logs "default" { + namespaces = [{{ include "alloy.logs.namespaces" . }}] + forward_to = [{{ include "agent.loki_write_targets" . }},] + } + + {{- if .Values.cloud.logs.enabled }} + remote.kubernetes.secret "logs_credentials" { + namespace = "{{- $.Release.Namespace -}}" + name = "{{- .Values.cloud.logs.secret -}}" + } + {{- end }} + + {{- if .Values.local.logs.enabled }} + loki.write "local" { + endpoint { + url = "http://loki-write.{{- .Release.Namespace -}}.svc.cluster.local:3100/loki/api/v1/push" + } + + external_labels = { + cluster = "{{- .Values.clusterLabelValue -}}", + } + } + {{- end }} + + {{- if .Values.cloud.logs.enabled }} + loki.write "cloud" { + endpoint { + url = nonsensitive(remote.kubernetes.secret.logs_credentials.data["endpoint"]) + basic_auth { + username = nonsensitive(remote.kubernetes.secret.logs_credentials.data["username"]) + password = remote.kubernetes.secret.logs_credentials.data["password"] + } + } + + external_labels = { + cluster = "{{- .Values.clusterLabelValue -}}", + environment = "{{- .Values.environmentLabelValue -}}", + } + } + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/alloy/metrics-config.yaml b/charts/meta-monitoring/templates/alloy/metrics-config.yaml new file mode 100644 index 0000000..382819b --- /dev/null +++ b/charts/meta-monitoring/templates/alloy/metrics-config.yaml @@ -0,0 +1,251 @@ +{{- if index .Values "alloy-metrics" "enabled" }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: alloy-metrics-config + namespace: {{ .Release.Namespace }} +data: + config.alloy: | + {{- if index .Values "alloy-metrics" "mimir" "enabled" }} + + remote.kubernetes.configmap "mimir" { + namespace = "{{ .Release.Namespace }}" + name = "mimir-metrics" + } + + import.string "mimir" { + content = remote.kubernetes.configmap.mimir.data.mimir + } + + mimir.kubernetes "targets" { + namespaces = ["{{- index .Values "alloy-metrics" "mimir" "namespace" -}}"] + } + + mimir.scrape "metrics" { + targets = mimir.kubernetes.targets.output + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if index .Values "alloy-metrics" "loki" "enabled" }} + remote.kubernetes.configmap "loki" { + namespace = "{{ .Release.Namespace }}" + name = "loki-metrics" + } + + import.string "loki" { + content = remote.kubernetes.configmap.loki.data.loki + } + + loki.kubernetes "targets" { + namespaces = ["{{- index .Values "alloy-metrics" "loki" "namespace" -}}"] + label_selectors = ["app.kubernetes.io/name=enterprise-logs"] + } + + loki.scrape "metrics" { + targets = loki.kubernetes.targets.output + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if index .Values "alloy-metrics" "tempo" "enabled" }} + remote.kubernetes.configmap "tempo" { + namespace = "{{ .Release.Namespace }}" + name = "tempo-metrics" + } + + import.string "tempo" { + content = remote.kubernetes.configmap.tempo.data.tempo + } + + tempo.kubernetes "targets" { + namespaces = ["{{- index .Values "alloy-metrics" "tempo" "namespace" -}}"] + } + + tempo.scrape "metrics" { + targets = tempo.kubernetes.targets.output + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if index .Values "alloy-metrics" "grafana" "enabled" }} + remote.kubernetes.configmap "grafana" { + namespace = "{{ .Release.Namespace }}" + name = "grafana-metrics" + } + + import.string "grafana" { + content = remote.kubernetes.configmap.grafana.data.grafana + } + + grafana.kubernetes "targets" { + namespaces = ["{{- index .Values "alloy-metrics" "grafana" "namespace" -}}"] + } + + grafana.scrape "metrics" { + targets = grafana.kubernetes.targets.output + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if index .Values "alloy-metrics" "alloy" "enabled" }} + remote.kubernetes.configmap "alloy" { + namespace = "{{ .Release.Namespace }}" + name = "alloy-metrics" + } + + import.string "alloy" { + content = remote.kubernetes.configmap.alloy.data.alloy + } + + alloy.kubernetes "targets" { + namespaces = ["{{- index .Values "alloy-metrics" "alloy" "namespace" -}}"] + } + + alloy.scrape "metrics" { + targets = alloy.kubernetes.targets.output + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if index .Values "alloy-metrics" "meta-monitoring" "enabled" }} + remote.kubernetes.configmap "meta_monitoring" { + namespace = "{{ .Release.Namespace }}" + name = "alloy-metrics" + } + + import.string "meta_monitoring" { + content = remote.kubernetes.configmap.meta_monitoring.data.alloy + } + + meta_monitoring.kubernetes "targets" { + namespaces = ["{{ .Release.Namespace -}}"] + } + + meta_monitoring.scrape "metrics" { + targets = meta_monitoring.kubernetes.targets.output + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if index .Values "alloy-metrics" "node_exporter" "enabled" }} + remote.kubernetes.configmap "node_exporter" { + namespace = "{{ .Release.Namespace }}" + name = "node-exporter-metrics" + } + + import.string "node_exporter" { + content = remote.kubernetes.configmap.node_exporter.data.node_exporter + } + + node_exporter.kubernetes "targets" { + label_selectors = [{{ include "alloy.node_exporter.labelselectors" . }}] + } + + node_exporter.scrape "metrics" { + targets = node_exporter.kubernetes.targets.output + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if index .Values "alloy-metrics" "kube_state_metrics" "enabled" }} + remote.kubernetes.configmap "kube_state_metrics" { + namespace = "{{ .Release.Namespace }}" + name = "kube-state-metrics-metrics" + } + + import.string "kube_state_metrics" { + content = remote.kubernetes.configmap.kube_state_metrics.data.kube_state_metrics + } + + kube_state_metrics.kubernetes "targets" { + namespaces = [{{- include "alloy.ksm.namespaces" . }}] + label_selectors = [{{ include "alloy.ksm.labelselectors" . }}] + } + + kube_state_metrics.scrape "metrics" { + targets = kube_state_metrics.kubernetes.targets.output + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if index .Values "alloy-metrics" "cadvisor" "enabled" }} + + remote.kubernetes.configmap "cadvisor" { + namespace = "{{ .Release.Namespace }}" + name = "cadvisor-metrics" + } + + import.string "cadvisor" { + content = remote.kubernetes.configmap.cadvisor.data.cadvisor + } + + cadvisor.cadvisor "targets" { + namespaces = [{{- include "alloy.cadvisor.namespaces" . }}] + clustering = true + forward_to = [ + {{ include "agent.prometheus_write_targets" . }}, + ] + } + {{- end }} + + {{- if .Values.cloud.metrics.enabled }} + prometheus.remote_write "cloud" { + endpoint { + url = nonsensitive(remote.kubernetes.secret.metrics_credentials.data["endpoint"]) + basic_auth { + username = nonsensitive(remote.kubernetes.secret.metrics_credentials.data["username"]) + password = remote.kubernetes.secret.metrics_credentials.data["password"] + } + } + + external_labels = { + cluster = "{{- .Values.clusterLabelValue -}}", + } + } + {{- end }} + + + {{- if .Values.local.metrics.enabled }} + prometheus.remote_write "local" { + endpoint { + url = "http://{{- .Release.Name -}}-mimir-nginx.{{- .Release.Namespace -}}.svc:80/api/v1/push" + } + + external_labels = { + cluster = "{{- .Values.clusterLabelValue -}}", + environment = "{{- .Values.environmentLabelValue -}}", + } + } + {{- end }} + + + remote.kubernetes.secret "metrics_credentials" { + namespace = "{{- $.Release.Namespace -}}" + name = "{{- .Values.cloud.metrics.secret -}}" + } +{{- end }} diff --git a/charts/meta-monitoring/templates/alloy/modules/logs/logs.yaml b/charts/meta-monitoring/templates/alloy/modules/logs/logs.yaml new file mode 100644 index 0000000..35e0d07 --- /dev/null +++ b/charts/meta-monitoring/templates/alloy/modules/logs/logs.yaml @@ -0,0 +1,142 @@ +{{- if index .Values "alloy-logs" "enabled" }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: logs + namespace: {{ .Release.Namespace }} +data: + logs: | + declare "pod_logs" { + argument "namespaces" { + comment = "Namespaces to search" + } + + argument "forward_to" { + comment = "Remote Write Component" + } + + discovery.kubernetes "targets" { + role = "pod" + + namespaces { + names = argument.namespaces.value + } + } + + discovery.relabel "targets" { + targets = discovery.kubernetes.targets.targets + + rule { + action = "replace" + separator = "/" + source_labels = [ + "__meta_kubernetes_pod_uid", + "__meta_kubernetes_pod_container_name", + ] + replacement = "/var/log/pods/*$1/*.log" + target_label = "__path__" + } + + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "__host__" + } + + rule { + action = "labelmap" + regex = "__meta_kubernetes_pod_label_(.+)" + } + + // make all annotations on the pod available to the pipeline as labels, + // they are omitted before write via labelallow unless explicitly set + rule { + action = "labelmap" + regex = "__meta_kubernetes_pod_annotation_(.+)" + } + + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + rule { + action = "replace" + source_labels = [ + "workload", + "__meta_kubernetes_namespace", + ] + regex = ".+\\/(.+);(.+)" + replacement = "$2/$1" + target_label = "job" + } + } + + local.file_match "pods" { + path_targets = discovery.relabel.targets.output + } + + loki.source.file "targets" { + targets = local.file_match.pods.targets + forward_to = [loki.process.label_keep.receiver] + } + + loki.process "label_keep" { + forward_to = argument.forward_to.value + + stage.label_keep { + values = ["job", "component", "app", "workload", "pod", "namespace", "container"] + } + } + } +{{- end }} diff --git a/charts/meta-monitoring/templates/alloy/modules/metrics/alloy.yaml b/charts/meta-monitoring/templates/alloy/modules/metrics/alloy.yaml new file mode 100644 index 0000000..1fa9fd0 --- /dev/null +++ b/charts/meta-monitoring/templates/alloy/modules/metrics/alloy.yaml @@ -0,0 +1,316 @@ +{{- if or (index .Values "alloy-metrics" "alloy" "enabled") (index .Values "alloy-metrics" "meta-monitoring" "enabled") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: alloy-metrics + namespace: {{ .Release.Namespace }} +data: + alloy: | + /* + Module: job-alloy + Description: Scrapes grafana alloy + + Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. + */ + declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // loki service discovery for all of the pods + discovery.kubernetes "loki" { + role = "pod" + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // loki relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.loki.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "http-metrics") + "@Running@true@false" + action = "keep" + } + + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "__host__" + } + + rule { + action = "labelmap" + regex = "__meta_kubernetes_pod_label_(.+)" + } + + // make all annotations on the pod available to the pipeline as labels, + // they are omitted before write via labelallow unless explicitly set + rule { + action = "labelmap" + regex = "__meta_kubernetes_pod_annotation_(.+)" + } + + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + rule { + action = "replace" + source_labels = [ + "workload", + "__meta_kubernetes_namespace", + ] + regex = ".+\\/(.+);(.+)" + replacement = "$2/$1" + target_label = "job" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } + } + + declare "local" { + + argument "port_name" { + comment = "The port to use (default: 12345)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "12345")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } + } + + declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all grafana-agent metric (default: integrations/agent)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // grafana alloy scrape job + prometheus.scrape "alloy" { + job_name = coalesce(argument.job_label.value, "integrations/alloy") + forward_to = [prometheus.relabel.alloy.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // grafana-alloy metric relabelings (post-scrape) + prometheus.relabel "alloy" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|log_.+|(alloy_(build_info|tcp_connections|wal_(samples_appended_total|storage_active_series))|go_(gc_duration_seconds_count|goroutines|memstats_heap_inuse_bytes)|process_(cpu_seconds_total|start_time_seconds)|prometheus_(remote_storage_(enqueue_retries_total|highest_timestamp_in_seconds|queue_highest_sent_timestamp_seconds|samples_(dropped_total|failed_total|pending|retried_total|total)|sent_batch_duration_seconds_(bucket|count|sum)|shard_(capacity|s(_desired|_max|_min))|succeeded_samples_total)|sd_discovered_targets|target_(interval_length_seconds_(count|sum)|scrapes_(exceeded_sample_limit_total|sample_(duplicate_timestamp_total|out_of_bounds_total|out_of_order_total)))|target_sync_length_seconds_sum|wal_watcher_current_segment)|traces_(exporter_send_failed_spans|exporter_sent_spans|loadbalancer_(backend_outcome|num_backends)|receiver_(accepted_spans|refused_spans))))") + action = "keep" + } + + // remove the component_id label from any metric that starts with log_bytes or log_lines, these are custom metrics that are generated + // as part of the log annotation modules in this repo + rule { + action = "replace" + source_labels = ["__name__"] + regex = "^log_(bytes|lines).+" + replacement = "" + target_label = "component_id" + } + + // set the namespace label to that of the exported_namespace + rule { + action = "replace" + source_labels = ["__name__", "exported_namespace"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "namespace" + } + + // set the pod label to that of the exported_pod + rule { + action = "replace" + source_labels = ["__name__", "exported_pod"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "pod" + } + + // set the container label to that of the exported_container + rule { + action = "replace" + source_labels = ["__name__", "exported_container"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "container" + } + + // set the job label to that of the exported_job + rule { + action = "replace" + source_labels = ["__name__", "exported_job"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "job" + } + + // set the instance label to that of the exported_instance + rule { + action = "replace" + source_labels = ["__name__", "exported_instance"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "instance" + } + + rule { + action = "labeldrop" + regex = "exported_(namespace|pod|container|job|instance)" + } + + rule { + action = "labelkeep" + regex = "__name__|job|component|app|workload|namespace|pod|container|route|status|status_code|le|operation|reason" + } + } + } +{{- end }} diff --git a/charts/meta-monitoring/templates/alloy/modules/metrics/cadvisor.yaml b/charts/meta-monitoring/templates/alloy/modules/metrics/cadvisor.yaml new file mode 100644 index 0000000..9619ad1 --- /dev/null +++ b/charts/meta-monitoring/templates/alloy/modules/metrics/cadvisor.yaml @@ -0,0 +1,234 @@ +{{- if index .Values "alloy-metrics" "cadvisor" "enabled" }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: cadvisor-metrics + namespace: {{ .Release.Namespace }} +data: + cadvisor: | + declare "cadvisor" { + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"metadata.name=kubernetes\"])" + optional = true + } + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + argument "job_label" { + comment = "The job label to add for all cadvisor metric (default: integrations/kubernetes/cadvisor)" + optional = true + } + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + argument "namespaces" { + optional = true + } + + export "output" { + value = discovery.relabel.cadvisor.output + } + + // cadvisor service discovery for all of the nodes + discovery.kubernetes "cadvisor" { + role = "node" + + selectors { + role = "node" + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, []), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // cadvisor relabelings (pre-scrape) + discovery.relabel "cadvisor" { + targets = discovery.kubernetes.cadvisor.targets + + // set the address to use the kubernetes service dns name + rule { + target_label = "__address__" + replacement = "kubernetes.default.svc.cluster.local:443" + } + + // set the metrics path to use the proxy path to the nodes cadvisor metrics endpoint + rule { + source_labels = ["__meta_kubernetes_node_name"] + regex = "(.+)" + replacement = "/api/v1/nodes/${1}/proxy/metrics/cadvisor" + target_label = "__metrics_path__" + } + + // set the node label + rule { + source_labels = ["__meta_kubernetes_node_name"] + target_label = "node" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_node_label_app_kubernetes_io_name", + "__meta_kubernetes_node_label_k8s_app", + "__meta_kubernetes_node_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + // cadvisor scrape job + prometheus.scrape "cadvisor" { + job_name = coalesce(argument.job_label.value, "integrations/kubernetes/cadvisor") + forward_to = [prometheus.relabel.cadvisor.receiver] + targets = discovery.relabel.cadvisor.output + scheme = "https" + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + + tls_config { + ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify = false + server_name = "kubernetes" + } + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // cadvisor metric relabelings (post-scrape) + prometheus.relabel "cadvisor" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|container_(cpu_(cfs_(periods|throttled_periods)_total|usage_seconds_total)|fs_(reads|writes)(_bytes)?_total|memory_(cache|rss|swap|working_set_bytes)|network_(receive|transmit)_(bytes_|packets_)(_dropped)?(total))|machine_memory_bytes)") + action = "keep" + } + + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + + // Normalizing unimportant labels (not deleting to continue satisfying