Skip to content

Commit

Permalink
enhancement: merge resource dashboards for ssd into resources-overvie…
Browse files Browse the repository at this point in the history
…w dashboard
  • Loading branch information
QuentinBisson committed Jul 10, 2024
1 parent 6ea83b4 commit af73de5
Show file tree
Hide file tree
Showing 10 changed files with 1,426 additions and 1,343 deletions.

This file was deleted.

1,252 changes: 1,252 additions & 0 deletions production/loki-mixin-compiled-ssd/dashboards/loki-resources-overview.json

Large diffs are not rendered by default.

This file was deleted.

64 changes: 30 additions & 34 deletions production/loki-mixin-compiled/dashboards/loki-reads-resources.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@
},
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"datasource": "$datasource",
Expand All @@ -314,7 +314,6 @@
},
"overrides": [ ]
},
"gridPos": { },
"id": 4,
"links": [ ],
"options": {
Expand All @@ -326,6 +325,7 @@
"sort": "none"
}
},
"span": 1,
"targets": [
{
"expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})",
Expand Down Expand Up @@ -403,7 +403,6 @@
}
]
},
"gridPos": { },
"id": 5,
"links": [ ],
"options": {
Expand All @@ -415,6 +414,7 @@
"sort": "none"
}
},
"span": 1,
"targets": [
{
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))",
Expand Down Expand Up @@ -504,7 +504,6 @@
}
]
},
"gridPos": { },
"id": 6,
"links": [ ],
"options": {
Expand All @@ -516,6 +515,7 @@
"sort": "none"
}
},
"span": 1,
"targets": [
{
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})",
Expand Down Expand Up @@ -566,7 +566,6 @@
},
"overrides": [ ]
},
"gridPos": { },
"id": 7,
"links": [ ],
"options": {
Expand All @@ -578,6 +577,7 @@
"sort": "none"
}
},
"span": 1,
"targets": [
{
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})",
Expand Down Expand Up @@ -616,7 +616,6 @@
},
"overrides": [ ]
},
"gridPos": { },
"id": 8,
"links": [ ],
"options": {
Expand All @@ -628,6 +627,7 @@
"sort": "none"
}
},
"span": 1,
"targets": [
{
"expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
Expand Down Expand Up @@ -663,7 +663,6 @@
},
"overrides": [ ]
},
"gridPos": { },
"id": 9,
"links": [ ],
"options": {
Expand All @@ -675,6 +674,7 @@
"sort": "none"
}
},
"span": 1,
"targets": [
{
"expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
Expand Down Expand Up @@ -710,7 +710,6 @@
},
"overrides": [ ]
},
"gridPos": { },
"id": 10,
"links": [ ],
"options": {
Expand All @@ -722,6 +721,7 @@
"sort": "none"
}
},
"span": 1,
"targets": [
{
"expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"ingester.*.*\"})",
Expand All @@ -739,8 +739,7 @@
"repeatRowId": null,
"showTitle": true,
"title": "Ingester",
"titleSize": "h6",
"type": "row"
"titleSize": "h6"
}
],
"schemaVersion": 14,
Expand Down
3 changes: 2 additions & 1 deletion production/loki-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
(import 'dashboards/loki-chunks.libsonnet') +
(import 'dashboards/loki-logs.libsonnet') +
(import 'dashboards/loki-operational.libsonnet') +
(import 'dashboards/loki-resources-overview.libsonnet') +
(import 'dashboards/loki-reads.libsonnet') +
(import 'dashboards/loki-reads-resources.libsonnet') +
(import 'dashboards/loki-writes.libsonnet') +
(import 'dashboards/loki-writes-resources.libsonnet') +
(import 'dashboards/loki-reads-resources.libsonnet') +
(import 'dashboards/loki-deletion.libsonnet') +
(import 'dashboards/loki-canary-dashboard.libsonnet') +
(import 'dashboards/recording-rules.libsonnet') +
Expand Down
62 changes: 19 additions & 43 deletions production/loki-mixin/dashboards/loki-reads-resources.libsonnet
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
local grafana = import 'grafonnet/grafana.libsonnet';
local utils = import 'mixin-utils/utils.libsonnet';

(import 'dashboard-utils.libsonnet') {
local index_gateway_pod_matcher = if $._config.meta_monitoring.enabled
then 'container=~"loki|index-gateway", pod=~"(index-gateway.*|%s-read.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
else if $._config.ssd.enabled then 'container="loki", pod=~"%s-read.*"' % $._config.ssd.pod_prefix_matcher else 'container="index-gateway"',
then 'container=~"loki|index-gateway", pod=~"(index-gateway.*|loki-single-binary)"'
else 'container="index-gateway"',
local index_gateway_job_matcher = if $._config.meta_monitoring.enabled
then '(index-gateway.*|%s-read.*|loki-single-binary)' % $._config.ssd.pod_prefix_matcher
else if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'index-gateway',
then '(index-gateway.*|loki-single-binary)'
else 'index-gateway',

local ingester_pod_matcher = if $._config.meta_monitoring.enabled
then 'container=~"loki|ingester", pod=~"(ingester.*|%s-write.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
else if $._config.ssd.enabled then 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher else 'container="ingester"',
then 'container=~"loki|ingester", pod=~"(ingester.*|loki-single-binary)"'
else 'container="ingester"',
local ingester_job_matcher = if $._config.meta_monitoring.enabled
then '(ingester.+|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher
else if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.+',
then '(ingester.+|loki-single-binary)'
else 'ingester.+',

grafanaDashboards+::
{
grafanaDashboards+:: if $._config.ssd.enabled then {} else {
'loki-reads-resources.json':
($.dashboard('Loki / Reads Resources', uid='reads-resources'))
.addCluster()
Expand All @@ -36,8 +32,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw(-internal)?'),
)
)
.addRowIf(
!$._config.ssd.enabled,
.addRow(
$.row('Query Frontend')
.addPanel(
$.containerCPUUsagePanel('CPU', 'query-frontend'),
Expand All @@ -49,8 +44,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.goHeapInUsePanel('Memory (go heap inuse)', 'query-frontend'),
)
)
.addRowIf(
!$._config.ssd.enabled,
.addRow(
$.row('Query Scheduler')
.addPanel(
$.containerCPUUsagePanel('CPU', 'query-scheduler'),
Expand All @@ -62,9 +56,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.goHeapInUsePanel('Memory (go heap inuse)', 'query-scheduler'),
)
)
.addRowIf(
!$._config.ssd.enabled,
grafana.row.new('Querier')
.addRow(
$.row('Querier')
.addPanel(
$.containerCPUUsagePanel('CPU', 'querier'),
)
Expand Down Expand Up @@ -94,24 +87,9 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.containerDiskSpaceUtilizationPanel('Disk Space Utilization', 'querier'),
)
)
// Add the read path for single scalable deployment only. The read path should not display disk utilization as the index gateway is present in the backend pods.
.addRowIf(
$._config.ssd.enabled,
grafana.row.new('Read path')
.addPanel(
$.CPUUsagePanel('CPU', index_gateway_pod_matcher),
)
.addPanel(
$.memoryWorkingSetPanel('Memory (workingset)', index_gateway_pod_matcher),
)
.addPanel(
$.goHeapInUsePanel('Memory (go heap inuse)', index_gateway_job_matcher),
)
)
// Otherwise we add the index gateway information
.addRowIf(
!$._config.ssd.enabled,
grafana.row.new('Index Gateway')
.addRow(
$.row('Index Gateway')
.addPanel(
$.CPUUsagePanel('CPU', index_gateway_pod_matcher),
)
Expand Down Expand Up @@ -141,9 +119,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.containerDiskSpaceUtilizationPanel('Disk Space Utilization', index_gateway_job_matcher),
)
)
.addRowIf(
!$._config.ssd.enabled,
grafana.row.new('Bloom Gateway')
.addRow(
$.row('Bloom Gateway')
.addPanel(
$.containerCPUUsagePanel('CPU', 'bloom-gateway'),
)
Expand Down Expand Up @@ -185,9 +162,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.goHeapInUsePanel('Memory (go heap inuse)', ingester_job_matcher),
)
)
.addRowIf(
!$._config.ssd.enabled,
grafana.row.new('Ruler')
.addRow(
$.row('Ruler')
.addPanel(
$.newQueryPanel('Rules') +
$.queryPanel(
Expand Down
107 changes: 107 additions & 0 deletions production/loki-mixin/dashboards/loki-resources-overview.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
(import 'dashboard-utils.libsonnet') {
local read_pod_matcher = 'container="loki", pod=~"%s-read.*"' % $._config.ssd.pod_prefix_matcher,
local read_job_matcher = '%s-read' % $._config.ssd.pod_prefix_matcher,

local write_pod_matcher = 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher,
local write_job_matcher = '%s-write' % $._config.ssd.pod_prefix_matcher,

local backend_pod_matcher = 'container="loki", pod=~"%s-backend.*"' % $._config.ssd.pod_prefix_matcher,
local backend_job_matcher = '%s-backend' % $._config.ssd.pod_prefix_matcher,

// This dashboard is for the single scalable deployment only and it :
// - replaces the loki-reads-resources dashboards
// - replaces the loki-write-resources dashboards
// - adds backend pods resources
grafanaDashboards+:: if !$._config.ssd.enabled then {} else {
'loki-resources-overview.json':
($.dashboard('Loki / Resources Overview', uid='resources-overview'))
.addCluster()
.addNamespace()
.addTag()
.addRow(
// The read path does not display disk utilization as the index gateway is present in the backend pods.
$.row('Read path')
.addPanel(
$.CPUUsagePanel('CPU', read_pod_matcher),
)
.addPanel(
$.memoryWorkingSetPanel('Memory (workingset)', read_pod_matcher),
)
.addPanel(
$.goHeapInUsePanel('Memory (go heap inuse)', read_job_matcher),
)
)
.addRow(
$.row('Write path')
.addPanel(
$.newQueryPanel('In-memory streams') +
$.queryPanel(
'sum by(%s) (loki_write_memory_streams{%s})' % [$._config.per_instance_label, $.jobMatcher(write_job_matcher)],
'{{%s}}' % $._config.per_instance_label
) +
{
tooltip: { sort: 2 }, // Sort descending.
},
)
.addPanel(
$.CPUUsagePanel('CPU', write_pod_matcher),
)
.addPanel(
$.memoryWorkingSetPanel('Memory (workingset)', write_pod_matcher),
)
.addPanel(
$.goHeapInUsePanel('Memory (go heap inuse)', write_job_matcher),
)
.addPanel(
$.newQueryPanel('Disk Writes', 'Bps') +
$.queryPanel(
'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(write_pod_matcher)],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.withStacking,
)
.addPanel(
$.newQueryPanel('Disk Reads', 'Bps') +
$.queryPanel(
'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(write_pod_matcher)],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.withStacking,
)
.addPanel(
$.containerDiskSpaceUtilizationPanel('Disk Space Utilization', write_job_matcher),
)
)
.addRow(
$.row('Backend path')
.addPanel(
$.CPUUsagePanel('CPU', backend_pod_matcher),
)
.addPanel(
$.memoryWorkingSetPanel('Memory (workingset)', backend_pod_matcher),
)
.addPanel(
$.goHeapInUsePanel('Memory (go heap inuse)', backend_job_matcher),
)
.addPanel(
$.newQueryPanel('Disk Writes', 'Bps') +
$.queryPanel(
'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(backend_pod_matcher)],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.withStacking,
)
.addPanel(
$.newQueryPanel('Disk Reads', 'Bps') +
$.queryPanel(
'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(backend_pod_matcher)],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.withStacking,
)
.addPanel(
$.containerDiskSpaceUtilizationPanel('Disk Space Utilization', backend_job_matcher),
)
),
},
}
2 changes: 0 additions & 2 deletions production/loki-mixin/dashboards/loki-retention.libsonnet
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
local utils = import 'mixin-utils/utils.libsonnet';

(import 'dashboard-utils.libsonnet') {
local compactor_pod_matcher = if $._config.meta_monitoring.enabled
then 'pod=~"(compactor.*|%s-backend.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
Expand Down
Loading

0 comments on commit af73de5

Please sign in to comment.