From 0aaa5c6527163656be5a859ec459903dc67f79b6 Mon Sep 17 00:00:00 2001 From: Leif Madsen Date: Thu, 7 Dec 2023 15:18:34 -0500 Subject: [PATCH] Implement dashboard management (#548) * Implement dashboard management Implement a new configuration option graphing.grafana.dashboards.enabled which results in dashboards objects being created for the Grafana Operator. Previously loading dashboards would be done manually via 'oc apply' using instructions from documentation. The new CRD parameters to the ServiceTelemetry object allows the Service Telemetry Operator to now make the GrafanaDashboard objects directly. Related: OSPRH-825 * Drop unnecessary cluster roles * Update CSV for owned parameter --- .../infra.watch_servicetelemetrys_crd.yaml | 9 +- ...fra.watch_v1beta1_servicetelemetry_cr.yaml | 5 +- .../infra.watch_servicetelemetrys_crd.yaml | 10 +- ...emetry-operator.clusterserviceversion.yaml | 6 +- roles/servicetelemetry/defaults/main.yml | 6 +- .../files/memcached-dashboard.json | 1513 ++++++++++++ .../files/rhos-cloud-dashboard.json | 1752 +++++++++++++ .../files/rhos-dashboard.json | 2179 +++++++++++++++++ .../files/virtual-machine-view.json | 1112 +++++++++ .../tasks/component_grafana.yml | 71 + 10 files changed, 6657 insertions(+), 6 deletions(-) create mode 100644 roles/servicetelemetry/files/memcached-dashboard.json create mode 100644 roles/servicetelemetry/files/rhos-cloud-dashboard.json create mode 100644 roles/servicetelemetry/files/rhos-dashboard.json create mode 100644 roles/servicetelemetry/files/virtual-machine-view.json diff --git a/deploy/crds/infra.watch_servicetelemetrys_crd.yaml b/deploy/crds/infra.watch_servicetelemetrys_crd.yaml index 286d2c74b..c29fe03ce 100644 --- a/deploy/crds/infra.watch_servicetelemetrys_crd.yaml +++ b/deploy/crds/infra.watch_servicetelemetrys_crd.yaml @@ -289,7 +289,7 @@ spec: description: Whether to disable the Grafana signout menu type: boolean ingressEnabled: - description: Enable ingress access to Grafana + description: Whether to enable ingress access to Grafana type: boolean adminPassword: description: Grafana admin password @@ -301,6 +301,13 @@ spec: baseImage: description: Path to the base container image used to instantiate a Grafana instance type: string + dashboards: + description: Dashboard configurations for Grafana + properties: + enabled: + description: Whether to enable built-in dashboards provided by Service Telemetry Framework + type: boolean + type: object type: object type: object cloudsRemoveOnMissing: diff --git a/deploy/crds/infra.watch_v1beta1_servicetelemetry_cr.yaml b/deploy/crds/infra.watch_v1beta1_servicetelemetry_cr.yaml index 8b4cf7142..ee728e2f4 100644 --- a/deploy/crds/infra.watch_v1beta1_servicetelemetry_cr.yaml +++ b/deploy/crds/infra.watch_v1beta1_servicetelemetry_cr.yaml @@ -79,10 +79,13 @@ spec: graphing: enabled: false grafana: - ingressEnabled: false + ingressEnabled: true adminPassword: secret adminUser: root disableSignoutMenu: false + baseImage: registry.redhat.io/rhel8/grafana:7 + dashboards: + enabled: true transports: qdr: enabled: true diff --git a/deploy/olm-catalog/service-telemetry-operator/manifests/infra.watch_servicetelemetrys_crd.yaml b/deploy/olm-catalog/service-telemetry-operator/manifests/infra.watch_servicetelemetrys_crd.yaml index f26cbc7b9..545ffd994 100644 --- a/deploy/olm-catalog/service-telemetry-operator/manifests/infra.watch_servicetelemetrys_crd.yaml +++ b/deploy/olm-catalog/service-telemetry-operator/manifests/infra.watch_servicetelemetrys_crd.yaml @@ -396,11 +396,19 @@ spec: description: Path to the base container image used to instantiate a Grafana instance type: string + dashboards: + description: Dashboard configurations for Grafana + properties: + enabled: + description: Whether to enable built-in dashboards provided + by Service Telemetry Framework + type: boolean + type: object disableSignoutMenu: description: Whether to disable the Grafana signout menu type: boolean ingressEnabled: - description: Enable ingress access to Grafana + description: Whether to enable ingress access to Grafana type: boolean type: object type: object diff --git a/deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml b/deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml index 1e1fdc092..1052f8ba3 100644 --- a/deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml +++ b/deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml @@ -119,8 +119,12 @@ metadata: "grafana": { "adminPassword": "secret", "adminUser": "root", + "baseImage": "registry.redhat.io/rhel8/grafana:7", + "dashboards": { + "enabled": true + }, "disableSignoutMenu": false, - "ingressEnabled": false + "ingressEnabled": true } }, "highAvailability": { diff --git a/roles/servicetelemetry/defaults/main.yml b/roles/servicetelemetry/defaults/main.yml index e8e92d855..263480e00 100644 --- a/roles/servicetelemetry/defaults/main.yml +++ b/roles/servicetelemetry/defaults/main.yml @@ -82,11 +82,13 @@ servicetelemetry_defaults: graphing: enabled: false grafana: - ingress_enabled: false + ingress_enabled: true admin_password: secret admin_user: root disable_signout_menu: false - base_image: docker.io/grafana/grafana:8.1.2 + base_image: registry.redhat.io/rhel8/grafana:7 + dashboards: + enabled: true # 'clouds' object is not partially updatable like other objects. If 'clouds' # object is defined then the default is overwritten. diff --git a/roles/servicetelemetry/files/memcached-dashboard.json b/roles/servicetelemetry/files/memcached-dashboard.json new file mode 100644 index 000000000..e68a439b9 --- /dev/null +++ b/roles/servicetelemetry/files/memcached-dashboard.json @@ -0,0 +1,1513 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Tracking dashboard for memcached service", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 2, + "iteration": 1698247048278, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 16, + "panels": [], + "title": "Availability and connections", + "type": "row" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_memcached_connections{service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Current connections", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_uptime{service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_memcached_items{service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Items", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "pluginVersion": "8.0.4", + "targets": [ + { + "exemplar": true, + "expr": "rate(collectd_memcached_connections_total{service=~\".+-$clouds-.+\"}[1m])", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Connection rate (1m)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(collectd_memcached_total_events_total{service=~\".+-$clouds-.+\"}[1m])", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Max connections reached", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 14, + "panels": [], + "title": "System metrics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "unit": "Bps" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:89", + "alias": "/Rx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(collectd_memcached_memcached_octets_tx_total{service=~\".+-$clouds-.+\"}[1m])", + "hide": false, + "interval": "", + "legendFormat": "Tx {{ host }}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "rate(collectd_memcached_memcached_octets_rx_total{service=~\".+-$clouds-.+\"}[1m])", + "interval": "", + "legendFormat": "Rx {{ host }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transfer rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:62", + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:63", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 42, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "collectd_libpodstats_pod_memory{plugin_instance=\"memcached\",service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Memory", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "collectd_libpodstats_pod_cpu_percent{plugin_instance=\"memcached\",service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "CPU percent", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(collectd_libpodstats_pod_cpu_time_total{plugin_instance=\"memcached\",service=~\".+-$clouds-.+\"}[1m])", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "CPU time", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 22, + "panels": [], + "title": "Cache performance", + "type": "row" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 3, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 36, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "(collectd_memcached_df_free{service=~\".+-$clouds-.+\"} + collectd_memcached_df_used{service=~\".+-$clouds-.+\"})", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Total cache available", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_df_used{service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Cache usage over time", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 3, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_df_used{service=~\".+-$clouds-.+\"} / (collectd_memcached_df_free{service=~\".+-$clouds-.+\"} + collectd_memcached_df_used{service=~\".+-$clouds-.+\"})", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Cache utilization", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 33, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_memcached_command_total{type_instance=\"get\",service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Total gets", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 35, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_memcached_command_total{type_instance=\"set\",service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Total sets", + "type": "stat" + }, + { + "datasource": null, + "description": "This is a calculated metric: get_hits / cmd_get. It indicates how efficient your Memcached server is.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(collectd_memcached_memcached_ops_total{type_instance=\"hits\",service=~\".+-$clouds-.+\"}[1m])", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Hit rate", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 39, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(collectd_memcached_memcached_ops_total{type_instance=\"misses\",service=~\".+-$clouds-.+\"}[1m])", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Miss rate", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 32, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_memcached_command_total{type_instance=\"flush\",service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Total flushes", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 40, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "collectd_memcached_memcached_ops_total{type_instance=\"evictions\",service=~\".+-$clouds-.+\"}", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Total evictions", + "type": "stat" + }, + { + "datasource": null, + "description": "The flush_all command invalidates all items in the database. This operation incurs a performance penalty and shouldn’t take place in production, so check your debug scripts.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(collectd_memcached_memcached_command_total{type_instance=\"flush\",service=~\".+-$clouds-.+\"}[1m])", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Flush rate", + "type": "timeseries" + }, + { + "datasource": null, + "description": "An eviction is when an item that still has time to live is removed from the cache because a brand new item needs to be allocated.\nThe item is selected with a pseudo-LRU mechanism.\nA high number of evictions coupled with a low hit rate means your application is setting a large number of keys that are never used again.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(collectd_memcached_memcached_ops_total{type_instance=\"evictions\",service=~\".+-$clouds-.+\"}[1m])", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Eviction rate", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 27, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "isNone": true, + "selected": true, + "text": "None", + "value": "" + }, + "datasource": null, + "definition": "label_values(collectd_memcached_percent, service)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "cloud", + "multi": false, + "name": "clouds", + "options": [], + "query": { + "query": "label_values(collectd_memcached_percent, service)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.+-(.+)-coll-meter/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Memcached View", + "uid": "VHbfxjinz", + "version": 3 +} diff --git a/roles/servicetelemetry/files/rhos-cloud-dashboard.json b/roles/servicetelemetry/files/rhos-cloud-dashboard.json new file mode 100644 index 000000000..d3ed49146 --- /dev/null +++ b/roles/servicetelemetry/files/rhos-cloud-dashboard.json @@ -0,0 +1,1752 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 3, + "iteration": 1695784064538, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": false, + "keepTime": true, + "tags": [ + "cloud-dashboards" + ], + "targetBlank": true, + "title": "Cloud Dashboards", + "tooltip": "", + "type": "dashboards", + "url": "" + } + ], + "panels": [ + { + "cacheTimeout": null, + "cards": { + "cardPadding": 0, + "cardRound": null + }, + "color": { + "cardColor": "#37872D", + "colorScale": "linear", + "colorScheme": "interpolateReds", + "exponent": 0.5, + "max": 1, + "min": 0, + "mode": "opacity" + }, + "dataFormat": "tsbuckets", + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 15, + "x": 0, + "y": 0 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 21, + "interval": "10m", + "legend": { + "show": false + }, + "links": [], + "pluginVersion": "6.5.1", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": " avg(sensubility_container_health_status{process=\"glance_api\", service=~\".+-$clouds-.+\"})", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "glance_api", + "refId": "D" + }, + { + "alias": "", + "bucketAggs": [ + { + "field": "startsAt", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"nova_api\", service=~\".+-$clouds-.+\"})", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "nova_api", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "A", + "timeField": "startsAt" + }, + { + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"heat_api\", service=~\".+-$clouds-.+\"})", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "heat_api", + "refId": "B" + }, + { + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"neutron_api\", service=~\".+-$clouds-.+\"})", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "neutron_api", + "refId": "C" + }, + { + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"placement_api\", service=~\".+-$clouds-.+\"})", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "placement_api", + "refId": "E" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Uptime", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "2m", + "yAxis": { + "decimals": null, + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "middle", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 0 + }, + "hiddenSeries": false, + "id": 42, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($top, sum by (plugin_instance, host) (collectd_libpodstats_pod_memory{service=~\".+-$clouds-.+\"}))", + "legendFormat": "{{plugin_instance}} on {{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Top $top Memory Consumers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 6 + }, + "hiddenSeries": false, + "id": 43, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($top, avg_over_time(collectd_libpodstats_pod_cpu_percent{service=~\".+-$clouds-.+\"}[10m]))", + "legendFormat": "{{plugin_instance}} on {{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Top $top CPU Consumers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:169", + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": null, + "show": true + }, + { + "$$hashKey": "object:170", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 8 + }, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "startsAt", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"placement_api\", service=~\".+-$clouds-.+\"})", + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "A", + "timeField": "startsAt" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Uptime placement_api", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 8 + }, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "startsAt", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"neutron_api\", service=~\".+-$clouds-.+\"})", + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "A", + "timeField": "startsAt" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Uptime neutron_api", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 8 + }, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "startsAt", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"heat_api\", service=~\".+-$clouds-.+\"})", + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "A", + "timeField": "startsAt" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Uptime heat_api", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 8 + }, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "startsAt", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"nova_api\", service=~\".+-$clouds-.+\"})", + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "A", + "timeField": "startsAt" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Uptime nova_api", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 8 + }, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "startsAt", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "exemplar": true, + "expr": "avg(sensubility_container_health_status{process=\"glance_api\", service=~\".+-$clouds-.+\"})", + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "A", + "timeField": "startsAt" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Uptime glance_api", + "type": "stat" + }, + { + "collapsed": false, + "datasource": "STFPrometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 6, + "panels": [], + "title": "Service Resource Usage", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "collectd_libpodstats_pod_cpu_percent{plugin_instance=\"horizon\", service=~\".+-$clouds-.+\"}", + "legendFormat": "{{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Horizon CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:235", + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": null, + "show": true + }, + { + "$$hashKey": "object:236", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 13 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "collectd_libpodstats_pod_memory{plugin_instance=\"horizon\", service=~\".+-$clouds-.+\"}", + "legendFormat": "{{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Horizon Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (host) (collectd_libpodstats_pod_cpu_percent{plugin_instance=~\"nova.*\", service=~\".+-$clouds-.+\"})", + "legendFormat": "{{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Nova CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:293", + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": null, + "show": true + }, + { + "$$hashKey": "object:294", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 23 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (host) (collectd_libpodstats_pod_memory{plugin_instance=~\"nova.*\", service=~\".+-$clouds-.+\"})", + "legendFormat": "{{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Nova Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (host) (collectd_libpodstats_pod_cpu_percent{plugin_instance=~\"ceilometer.*\", service=~\".+-$clouds-.+\"})", + "legendFormat": "{{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ceilometer CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:465", + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": null, + "show": true + }, + { + "$$hashKey": "object:466", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 28 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (host) (collectd_libpodstats_pod_memory{plugin_instance=~\"ceilometer.*\", service=~\".+-$clouds-.+\"})", + "legendFormat": "{{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ceilometer Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "STFPrometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 23, + "panels": [], + "title": "Instances", + "type": "row" + }, + { + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a", + "#4040a0" + ], + "datasource": "STFPrometheus", + "description": "Click instance for drill down view", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 17, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "polystat": { + "animationSpeed": 2500, + "columnAutoSize": true, + "columns": "", + "defaultClickThrough": "", + "defaultClickThroughNewTab": false, + "defaultClickThroughSanitize": false, + "displayLimit": 100, + "fontAutoColor": true, + "fontAutoScale": true, + "fontSize": 12, + "fontType": "Roboto", + "globalDecimals": 2, + "globalDisplayMode": "all", + "globalDisplayTextTriggeredEmpty": "OK", + "globalOperatorName": "avg", + "globalUnitFormat": "short", + "gradientEnabled": true, + "hexagonSortByDirection": 1, + "hexagonSortByField": "name", + "maxMetrics": 0, + "polygonBorderColor": "black", + "polygonBorderSize": 2, + "polygonGlobalFillColor": "#FFF899", + "radius": "", + "radiusAutoSize": true, + "rowAutoSize": true, + "rows": "", + "shape": "hexagon_pointed_top", + "tooltipDisplayMode": "all", + "tooltipDisplayTextTriggeredEmpty": "OK", + "tooltipFontSize": 12, + "tooltipFontType": "Roboto", + "tooltipPrimarySortDirection": 2, + "tooltipPrimarySortField": "thresholdLevel", + "tooltipSecondarySortDirection": 2, + "tooltipSecondarySortField": "value", + "tooltipTimestampEnabled": true, + "valueEnabled": true + }, + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": "projects", + "repeatDirection": "h", + "savedComposites": [], + "savedOverrides": [], + "targets": [ + { + "exemplar": true, + "expr": "sum by (resource, plugin_instance) (label_replace(collectd_virt_memory{service=~\".+-$clouds-.+\"}, \"resource\", \"$1\", \"host\", \".+:(.+):.+\")) + on(resource) group_right(plugin_instance) ceilometer_cpu{project=\"$projects\", service=~\".+-$clouds-.+\"}", + "instant": true, + "interval": "", + "legendFormat": "{{plugin_instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Project $projects", + "type": "grafana-polystat-panel", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ] + } + ], + "refresh": "1m", + "schemaVersion": 27, + "style": "dark", + "tags": [ + "cloud-dashboards" + ], + "templating": { + "list": [ + { + "allValue": null, + "datasource": null, + "definition": "label_values(collectd_cpu_percent, service)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "cloud", + "multi": false, + "name": "clouds", + "options": [], + "query": { + "query": "label_values(collectd_cpu_percent, service)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.+-(.+)-coll-meter/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "STFPrometheus", + "definition": "label_values(ceilometer_cpu{service=~\".+-$clouds-.+\"},project)", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "projects", + "options": [], + "query": { + "query": "label_values(ceilometer_cpu{service=~\".+-$clouds-.+\"},project)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "5", + "value": "5" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "top", + "options": [ + { + "selected": false, + "text": "1", + "value": "1" + }, + { + "selected": false, + "text": "2", + "value": "2" + }, + { + "selected": false, + "text": "3", + "value": "3" + }, + { + "selected": false, + "text": "4", + "value": "4" + }, + { + "selected": true, + "text": "5", + "value": "5" + }, + { + "selected": false, + "text": "6", + "value": "6" + }, + { + "selected": false, + "text": "7", + "value": "7" + }, + { + "selected": false, + "text": "8", + "value": "8" + }, + { + "selected": false, + "text": "9", + "value": "9" + }, + { + "selected": false, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "11", + "value": "11" + }, + { + "selected": false, + "text": "12", + "value": "12" + }, + { + "selected": false, + "text": "13", + "value": "13" + }, + { + "selected": false, + "text": "14", + "value": "14" + }, + { + "selected": false, + "text": "15", + "value": "15" + }, + { + "selected": false, + "text": "16", + "value": "16" + }, + { + "selected": false, + "text": "17", + "value": "17" + }, + { + "selected": false, + "text": "18", + "value": "18" + }, + { + "selected": false, + "text": "19", + "value": "19" + }, + { + "selected": false, + "text": "20", + "value": "20" + } + ], + "query": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Cloud View", + "uid": "IHqhpjPZz", + "version": 15 +} diff --git a/roles/servicetelemetry/files/rhos-dashboard.json b/roles/servicetelemetry/files/rhos-dashboard.json new file mode 100644 index 000000000..871f02366 --- /dev/null +++ b/roles/servicetelemetry/files/rhos-dashboard.json @@ -0,0 +1,2179 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 2, + "iteration": 1695783546006, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": "STFPrometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 68, + "panels": [], + "title": "Quickview", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "Node Active" + }, + "1": { + "text": "Node Inactive" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#37872D", + "value": null + }, + { + "color": "#C4162A", + "value": 1 + }, + { + "color": "#C4162A", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 2, + "x": 0, + "y": 1 + }, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "absent({host = '$hosts', service=~\".+-$clouds-.+\"}) or label_replace(vector(0), \"host\", \"$hosts\", \"host\", \".*\")", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "description": "Time node has been operational", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "dtdurations" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 2, + "x": 2, + "y": 1 + }, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "expr": "collectd_uptime{host=\"$hosts\", service=~\".+-$clouds-.+\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Uptime", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "0.0%", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "#d44a3a", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 4, + "y": 1 + }, + "id": 19, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "expr": "sum(collectd_cpu_percent{type_instance!=\"idle\", host=\"$hosts\", service=~\".+-$clouds-.+\"}) / count(sum by (host,plugin_instance) (collectd_cpu_percent{host=\"$hosts\", service=~\".+-$clouds-.+\"}))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "0.0%", + "type": 1, + "value": "null" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "#d44a3a", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 7, + "y": 1 + }, + "id": 44, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "expr": "sum(collectd_memory{type_instance=\"used\",host=\"$hosts\", service=~\".+-$clouds-.+\"})/ sum(collectd_memory{host=\"$hosts\", service=~\".+-$clouds-.+\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{memory}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 10, + "y": 1 + }, + "id": 41, + "links": [], + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "expr": "sum(collectd_df_df_complex{host=\"$hosts\",type_instance=\"used\", service=~\".+-$clouds-.+\"}) by (plugin_instance) / sum(collectd_df_df_complex{host=\"$hosts\", service=~\".+-$clouds-.+\"}) by (plugin_instance)", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{plugin_instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "File Systems", + "type": "bargauge" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 13, + "y": 1 + }, + "id": 54, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "delta" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "expr": "sum(collectd_interface_if_errors_rx_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}) + sum(collectd_interface_if_errors_tx_total{host=\"$hosts\", service=~\".+-$clouds-.+\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{plugin_instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Interface Errors", + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "description": "Load average represents the average number of running and un-interruptable processes residing in the kernel's execution queue. \n\nTypically, short term, midterm, and long term series give running averages of 1m, 5m, and 15m, respectively. ", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 1 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "collectd_load_shortterm{host=\"$hosts\", service=~\".+-$clouds-.+\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "short term", + "refId": "A" + }, + { + "expr": "collectd_load_midterm{host=\"$hosts\", service=~\".+-$clouds-.+\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "mid term", + "refId": "B" + }, + { + "expr": "collectd_load_longterm{host=\"$hosts\", service=~\".+-$clouds-.+\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "long term", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": "Processes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "STFPrometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 37, + "panels": [], + "title": "Network Interfaces", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 48, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(collectd_interface_if_octets_rx_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m])", + "legendFormat": "Rx {{plugin_instance}}", + "refId": "A" + }, + { + "expr": "rate(collectd_interface_if_octets_tx_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m])", + "legendFormat": "Tx {{plugin_instance}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Data", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 56, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(collectd_interface_if_errors_rx_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Rx {{plugin_instance}}", + "refId": "A" + }, + { + "expr": "rate(collectd_interface_if_errors_tx_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m])", + "legendFormat": "Tx {{plugin_instance}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error Rates", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "errors/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 7 + }, + "hiddenSeries": false, + "id": 53, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(collectd_interface_if_dropped_rx_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Rx {{plugin_instance}}", + "refId": "A" + }, + { + "expr": "rate(collectd_interface_if_dropped_tx_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m])", + "legendFormat": "Tx {{plugin_instance}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Drop Rates", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "STFPrometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 21, + "panels": [], + "title": "CPU", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "description": "Average non-idle CPU activity of all cores on node", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(collectd_cpu_percent{type_instance!=\"idle\", host=\"$hosts\", service=~\".+-$clouds-.+\"}) / count(sum by (type_instance) (collectd_cpu_percent{type_instance!=\"idle\",host=\"$hosts\", service=~\".+-$clouds-.+\"}))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Aggr. Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "description": "Shows average time spent for each activity across all cores", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(collectd_cpu_percent{type_instance!=\"idle\", host=\"$hosts\", service=~\".+-$clouds-.+\"}) by (type_instance) / count(collectd_cpu_percent{host=\"$hosts\", service=~\".+-$clouds-.+\"}) by (type_instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type_instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Aggr. Usage by Type", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "STFPrometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 25, + "panels": [], + "title": "Memory", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "decimals": null, + "description": "Memory used on node", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 25 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(collectd_memory{type_instance=\"used\",host=\"$hosts\", service=~\".+-$clouds-.+\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 9, + "x": 12, + "y": 25 + }, + "hiddenSeries": false, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(collectd_hugepages_vmpage_number{type_instance=\"used\",host=\"$hosts\", service=~\".+-$clouds-.+\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{hugepages}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Huge Pages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "0.0%", + "type": 1, + "value": "null" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 25 + }, + "id": 71, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "expr": "sum(collectd_hugepages_vmpage_number{type_instance=\"used\",host=\"$hosts\", service=~\".+-$clouds-.+\"}) / sum(collectd_hugepages_vmpage_number{host=\"$hosts\", service=~\".+-$clouds-.+\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{hugepages}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Huge Pages (%)", + "type": "gauge" + }, + { + "collapsed": false, + "datasource": "STFPrometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 11, + "panels": [], + "title": "File System", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "STFPrometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 31 + }, + "id": 51, + "links": [], + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "expr": "sum by (plugin_instance) (collectd_df_df_inodes{type_instance=\"used\", host=\"$hosts\", service=~\".+-$clouds-.+\"}) / sum by (plugin_instance) (collectd_df_df_inodes{host=\"$hosts\", service=~\".+-$clouds-.+\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{plugin_instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Inode Usage", + "type": "bargauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 18, + "x": 6, + "y": 31 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (plugin_instance) (collectd_df_df_complex{type_instance!~\"free\",host=\"$hosts\", service=~\".+-$clouds-.+\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{plugin_instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "File System Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "STFPrometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 70, + "panels": [], + "title": "Disk", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "decimals": 2, + "description": "10m rolling average", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 40 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(collectd_disk_disk_octets_read_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "read", + "refId": "B" + }, + { + "expr": "sum(rate(collectd_disk_disk_octets_write_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "write", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "decimals": 2, + "description": "Approximate percentage of total disk bandwidth being used.\n\nWeighted I/O includes the backlog that may be accumulating.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 40 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(collectd_disk_disk_io_time_io_time_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[1h]))/1000", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "i/o", + "refId": "A" + }, + { + "expr": "sum(rate(collectd_disk_disk_io_time_weighted_io_time_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[1h]))/1000", + "legendFormat": "weighted i/o", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "decimals": null, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 46 + }, + "hiddenSeries": false, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(collectd_disk_disk_ops_read_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "read", + "refId": "A" + }, + { + "expr": "sum(rate(collectd_disk_disk_ops_write_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "write", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Operations/s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "STFPrometheus", + "description": "Average time each I/O operation took to complete. Per the collectd disk plugin docs (https://collectd.org/wiki/index.php/Plugin:Disk), this average is not very accurate.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 46 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(collectd_disk_disk_time_read_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "read", + "refId": "A" + }, + { + "expr": "sum(rate(collectd_disk_disk_time_write_total{host=\"$hosts\", service=~\".+-$clouds-.+\"}[10m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "write", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Avg. I/O Operation Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "schemaVersion": 27, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "datasource": null, + "definition": "label_values(collectd_cpu_percent, service)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "cloud", + "multi": false, + "name": "clouds", + "options": [], + "query": { + "query": "label_values(collectd_cpu_percent, service)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.+-(.+)-coll-meter/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "datasource": "STFPrometheus", + "definition": "label_values(collectd_cpu_percent{service=~\".+-$clouds-.+\"}, host)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "node", + "multi": false, + "name": "hosts", + "options": [], + "query": { + "query": "label_values(collectd_cpu_percent{service=~\".+-$clouds-.+\"}, host)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Infrastructure Node View", + "uid": "1F1OJZEWz", + "version": 4 +} diff --git a/roles/servicetelemetry/files/virtual-machine-view.json b/roles/servicetelemetry/files/virtual-machine-view.json new file mode 100644 index 000000000..0d5b4a191 --- /dev/null +++ b/roles/servicetelemetry/files/virtual-machine-view.json @@ -0,0 +1,1112 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 4, + "iteration": 1695785660982, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "count((ceilometer_cpu{project=\"$project\"}))", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Virtual Machine Instances", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "pluginVersion": "8.0.4", + "targets": [ + { + "exemplar": true, + "expr": "label_replace(label_replace(collectd_virt_percent, \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (avg by (resource,project) (ceilometer_cpu{project=\"$project\"}) * 0)", + "format": "time_series", + "interval": "", + "legendFormat": "{{ plugin_instance }} on {{ node }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "VM CPU %", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "pluginVersion": "8.0.4", + "targets": [ + { + "exemplar": true, + "expr": "rate(ceilometer_cpu{project=\"$project\"}[1m])", + "interval": "", + "legendFormat": "{{ resource }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Time for Instances", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "plugin_instance" + }, + "properties": [ + { + "id": "displayName", + "value": "Virtual Machines" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Value/" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "auto" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 3 + }, + "id": 16, + "options": { + "showHeader": true + }, + "pluginVersion": "7.5.15", + "targets": [ + { + "exemplar": true, + "expr": "(ceilometer_cpu{project=\"$project\"}) + on (resource) group_right(project) label_replace(label_replace(collectd_virt_virt_cpu_total_total, \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"Node\", \"$1\", \"host\", \".+:.+:(.+)\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "{{ plugin_instance }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "plugin_instance", + "Node" + ] + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "datasource": null, + "description": "Memory utilization of that allocated to the virtual machine.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 50, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true + }, + "links": [], + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 6 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "pluginVersion": "8.0.4", + "targets": [ + { + "exemplar": true, + "expr": "(label_replace(label_replace(collectd_virt_memory, \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") / 1000000) / on (resource) group_left (project) ceilometer_memory_usage{project=\"$project\"}", + "format": "time_series", + "interval": "", + "legendFormat": "{{ plugin_instance }} [{{ type_instance }}] on {{ node }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "VM Memory Utilization (Allocated)", + "type": "timeseries" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Virtual machine disk operations rate (in operations/second)", + "fieldConfig": { + "defaults": { + "links": [], + "unit": "decbytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 6 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_disk_ops_read_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Read {{ plugin_instance }} disk {{ type_instance }}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_disk_ops_write_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Write {{ plugin_instance }} disk {{ type_instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "VM Disk Operations Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Virtual machine network dropped packet rate (in packets-per-second)", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 12 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:140", + "alias": "/Tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_if_dropped_rx_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Rx {{ plugin_instance }} interface {{ type_instance }}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_if_dropped_tx_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Tx {{ plugin_instance }} interface {{ type_instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "VM Network Dropped Packet Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Virtual machine network error rate (in packets-per-second)", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 12 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:201", + "alias": "/Tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_if_errors_rx_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Rx {{ plugin_instance }} interface {{ type_instance }}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_if_errors_tx_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Tx {{ plugin_instance }} interface {{ type_instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "VM Network Error Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Virtual machine disk throughput rate (in bytes)", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 17 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:322", + "alias": "/Write/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_disk_octets_read_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Read {{ plugin_instance }} disk {{ type_instance }}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_disk_octets_write_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Write {{ plugin_instance }} disk {{ type_instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "VM Disk Throughput Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Virtual machine network throughput rate (in bytes)", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 17 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.15", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:383", + "alias": "/Tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_if_octets_rx_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Rx {{ plugin_instance }} interface {{ type_instance }}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "label_replace(label_replace(rate(collectd_virt_if_octets_tx_total[1m]), \"resource\", \"$1\", \"host\", \".+:(.+):.+\"), \"node\", \"$1\", \"host\", \".+:.+:(.+)\") + on (resource) group_left(project) (ceilometer_cpu{project=\"$project\"} * 0)", + "interval": "", + "legendFormat": "Tx {{ plugin_instance }} interface {{ type_instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "VM Network Throughput Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "schemaVersion": 27, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "datasource": null, + "definition": "label_values(service)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "cloud", + "multi": false, + "name": "clouds", + "options": [], + "query": { + "query": "label_values(service)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.+-(.+)-coll-meter/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "datasource": null, + "definition": "label_values(ceilometer_cpu{service=~\".+-$clouds-.+\"}, project)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "project", + "multi": false, + "name": "project", + "options": [], + "query": { + "query": "label_values(ceilometer_cpu{service=~\".+-$clouds-.+\"}, project)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Virtual Machine View", + "uid": "JJzvn8mnz", + "version": 4 +} diff --git a/roles/servicetelemetry/tasks/component_grafana.yml b/roles/servicetelemetry/tasks/component_grafana.yml index 068507610..df012b70a 100644 --- a/roles/servicetelemetry/tasks/component_grafana.yml +++ b/roles/servicetelemetry/tasks/component_grafana.yml @@ -115,3 +115,74 @@ state: '{{ "present" if servicetelemetry_vars.graphing.enabled else "absent" }}' definition: '{{ ds_manifest }}' + + - name: Load Cloud Overview Dashboard + k8s: + state: '{{ "present" if servicetelemetry_vars.graphing.grafana.dashboards.enabled else "absent" }}' + definition: + apiVersion: integreatly.org/v1alpha1 + kind: GrafanaDashboard + metadata: + labels: + app: grafana + stf_owner: "{{ ansible_operator_meta.name }}" + name: rhos-cloud-dashboard-1 + namespace: "{{ ansible_operator_meta.namespace }}" + spec: + name: rhos-cloud-dashboard.json + plugins: + - name: grafana-polystat-panel + version: "1.2.11" + json: | + {{ lookup('file', 'rhos-cloud-dashboard.json') | string }} + + - name: Load Infrastructure Overview Dashboard + k8s: + state: '{{ "present" if servicetelemetry_vars.graphing.grafana.dashboards.enabled else "absent" }}' + definition: + apiVersion: integreatly.org/v1alpha1 + kind: GrafanaDashboard + metadata: + labels: + app: grafana + stf_owner: "{{ ansible_operator_meta.name }}" + name: rhos-dashboard-1 + namespace: "{{ ansible_operator_meta.namespace }}" + spec: + name: rhos-dashboard.json + json: | + {{ lookup('file', 'rhos-dashboard.json') | string }} + + - name: Load Memcached Dashboard + k8s: + state: '{{ "present" if servicetelemetry_vars.graphing.grafana.dashboards.enabled else "absent" }}' + definition: + apiVersion: integreatly.org/v1alpha1 + kind: GrafanaDashboard + metadata: + labels: + app: grafana + stf_owner: "{{ ansible_operator_meta.name }}" + name: memcached-dashboard-1 + namespace: "{{ ansible_operator_meta.namespace }}" + spec: + name: memcached-dashboard.json + json: | + {{ lookup('file', 'memcached-dashboard.json') | string }} + + - name: Load Virtual Machine View Dashboard + k8s: + state: '{{ "present" if servicetelemetry_vars.graphing.grafana.dashboards.enabled else "absent" }}' + definition: + apiVersion: integreatly.org/v1alpha1 + kind: GrafanaDashboard + metadata: + labels: + app: grafana + stf_owner: "{{ ansible_operator_meta.name }}" + name: virtual-machine-dashboard-1 + namespace: "{{ ansible_operator_meta.namespace }}" + spec: + name: virtual-machine-view.json + json: | + {{ lookup('file', 'virtual-machine-view.json') | string }}