Skip to content

Commit

Permalink
Merge pull request #37 from GeorgianaElena/multiple-datasource
Browse files Browse the repository at this point in the history
Support multiple datasource
  • Loading branch information
yuvipanda authored Apr 21, 2022
2 parents 775a09f + 232c355 commit f63017a
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 24 deletions.
31 changes: 30 additions & 1 deletion dashboards/cluster.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,21 @@ local heatmapPanel = grafana.heatmapPanel;
local jupyterhub = import './jupyterhub.libsonnet';
local standardDims = jupyterhub.standardDims;

local templates = [
template.datasource(
name='PROMETHEUS_DS',
query='prometheus',
current=null,
hide='label',
),
];

// Cluster-wide stats
local userNodes = graphPanel.new(
'Node Count',
decimals=0,
min=0,
datasource='$PROMETHEUS_DS'
).addTarget(
prometheus.target(
expr='sum(kube_node_labels) by (label_cloud_google_com_gke_nodepool)',
Expand All @@ -32,6 +41,7 @@ local userPods = graphPanel.new(
decimals=0,
min=0,
stack=true,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand All @@ -57,6 +67,7 @@ local clusterMemoryCommitment = graphPanel.new(
// but full is still full. This gets a better view of 'fullness' most of the time.
// If the commitment is "off the chart" it doesn't super matter by how much.
max=1,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down Expand Up @@ -98,6 +109,7 @@ local clusterCPUCommitment = graphPanel.new(
// but full is still full. This gets a better view of 'fullness' most of the time.
// If the commitment is "off the chart" it doesn't super matter by how much.
max=1,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down Expand Up @@ -138,6 +150,7 @@ local nodeCPUCommit = graphPanel.new(
// but full is still full. This gets a better view of 'fullness' most of the time.
// If the commitment is "off the chart" it doesn't super matter by how much.
max=1,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down Expand Up @@ -171,6 +184,7 @@ local nodeMemoryCommit = graphPanel.new(
// but full is still full. This gets a better view most of the time.
// If the commitment is "off the chart" it doesn't super matter by how much.
max=1,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down Expand Up @@ -203,6 +217,7 @@ local nodeMemoryUtil = graphPanel.new(
min=0,
// since this is actual measured utilization, it should not be able to exceed max=1
max=1,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down Expand Up @@ -230,6 +245,7 @@ local nodeCPUUtil = graphPanel.new(
min=0,
// since this is actual measured utilization, it should not be able to exceed max=1
max=1,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand All @@ -256,11 +272,11 @@ local nonRunningPods = graphPanel.new(
decimals=0,
legend_hideZero=true,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(kube_pod_status_phase{phase!="Running"}) by (phase)',
legendFormat='{{phase}}',

),
]);

Expand All @@ -270,6 +286,7 @@ local userNodesNFSOps = graphPanel.new(
'User Nodes NFS Ops',
decimals=0,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(rate(node_nfs_requests_total[5m])) by (kubernetes_node) > 0',
Expand All @@ -281,6 +298,7 @@ local userNodesIOWait = graphPanel.new(
'iowait % on each node',
decimals=0,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(rate(node_nfs_requests_total[5m])) by (kubernetes_node)',
Expand All @@ -292,6 +310,7 @@ local userNodesHighNFSOps = graphPanel.new(
'NFS Operation Types on user nodes',
decimals=0,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(rate(node_nfs_requests_total[5m])) by (method) > 0',
Expand All @@ -302,6 +321,7 @@ local userNodesHighNFSOps = graphPanel.new(
local nfsServerCPU = graphPanel.new(
'NFS Server CPU',
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'avg(rate(node_cpu_seconds_total{job="prometheus-nfsd-server", mode!="idle"}[2m])) by (mode)',
Expand All @@ -313,6 +333,7 @@ local nfsServerIOPS = graphPanel.new(
'NFS Server Disk ops',
decimals=0,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(rate(node_nfsd_disk_bytes_read_total[5m]))',
Expand All @@ -327,6 +348,7 @@ local nfsServerIOPS = graphPanel.new(
local nfsServerWriteLatency = graphPanel.new(
'NFS Server disk write latency',
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(rate(node_disk_write_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_writes_completed_total{job="prometheus-nfsd-server"}[5m])) by (device)',
Expand All @@ -337,6 +359,7 @@ local nfsServerWriteLatency = graphPanel.new(
local nfsServerReadLatency = graphPanel.new(
'NFS Server disk read latency',
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(rate(node_disk_read_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_reads_completed_total{job="prometheus-nfsd-server"}[5m])) by (device)',
Expand All @@ -349,6 +372,7 @@ local prometheusMemory = graphPanel.new(
'Prometheus Memory (Working Set)',
formatY1='bytes',
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(container_memory_working_set_bytes{pod=~"support-prometheus-server-.*", namespace="support"})'
Expand All @@ -358,6 +382,7 @@ local prometheusMemory = graphPanel.new(
local prometheusCPU = graphPanel.new(
'Prometheus CPU',
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(rate(container_cpu_usage_seconds_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m]))'
Expand All @@ -368,6 +393,7 @@ local prometheusDiskSpace = graphPanel.new(
'Prometheus Free Disk space',
formatY1='bytes',
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(kubelet_volume_stats_available_bytes{namespace="support",persistentvolumeclaim="support-prometheus-server"})'
Expand All @@ -379,6 +405,7 @@ local prometheusNetwork = graphPanel.new(
formatY1='bytes',
decimals=0,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'sum(rate(container_network_receive_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m]))',
Expand All @@ -394,6 +421,8 @@ dashboard.new(
'Cluster Information',
tags=['jupyterhub', 'kubernetes'],
editable=true
).addTemplates(
templates
).addPanel(
row.new('Cluster Stats'), {},
).addPanel(
Expand Down
19 changes: 18 additions & 1 deletion dashboards/jupyterhub.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ local jupyterhub = import 'jupyterhub.libsonnet';
local standardDims = jupyterhub.standardDims;

local templates = [
template.datasource(
name='PROMETHEUS_DS',
query='prometheus',
current=null,
hide='label',
),
template.new(
'hub',
datasource='prometheus',
datasource='$PROMETHEUS_DS',
query='label_values(kube_service_labels{service="hub"}, namespace)',
// Allow viewing dashboard for multiple combined hubs
includeAll=true,
Expand All @@ -31,6 +37,7 @@ local currentRunningUsers = graphPanel.new(
'Current running users',
decimals=0,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand All @@ -50,6 +57,7 @@ local userMemoryDistribution = heatmapPanel.new(
yAxis_format='bytes',
yAxis_min=0,
color_colorScheme='interpolateViridis',
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand All @@ -73,6 +81,7 @@ local userCPUDistribution = heatmapPanel.new(
yAxis_format='percentunit',
yAxis_min=0,
color_colorScheme='interpolateViridis',
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand All @@ -96,6 +105,7 @@ local userAgeDistribution = heatmapPanel.new(
yAxis_format='s',
yAxis_min=0,
color_colorScheme='interpolateViridis',
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand All @@ -117,6 +127,7 @@ local hubResponseLatency = graphPanel.new(
'Hub response latency',
formatY1='s',
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
'histogram_quantile(0.99, sum(rate(jupyterhub_request_duration_seconds_bucket{app="jupyterhub", kubernetes_namespace=~"$hub"}[5m])) by (le))',
Expand All @@ -140,6 +151,7 @@ local serverStartTimes = graphPanel.new(
min=0,
points=true,
pointradius=2,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
// Metrics from hub seems to have `kubernetes_namespace` rather than just `namespace`
Expand All @@ -156,6 +168,7 @@ local usersPerNode = graphPanel.new(
'Users per node',
decimals=0,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand All @@ -181,6 +194,7 @@ local nonRunningPods = graphPanel.new(
decimalsY1=0,
min=0,
stack=true,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down Expand Up @@ -213,6 +227,7 @@ local oldUserpods = tablePanel.new(
col: 2,
desc: true,
},
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down Expand Up @@ -246,6 +261,7 @@ local highCPUUserPods = tablePanel.new(
col: 2,
desc: true,
},
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down Expand Up @@ -279,6 +295,7 @@ local highMemoryUsagePods = tablePanel.new(
col: 2,
desc: true,
},
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand Down
21 changes: 19 additions & 2 deletions dashboards/usage-stats.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,21 @@ local heatmapPanel = grafana.heatmapPanel;

local standardDims = { w: 12, h: 12 };

local templates = [
template.datasource(
name='PROMETHEUS_DS',
query='prometheus',
current={},
hide='label',
),
];

local monthlyActiveUsers = graphPanel.new(
'Active users (over 30 days)',
bars=true,
lines=false,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
// Removes any pods caused by stress testing
Expand Down Expand Up @@ -44,6 +54,7 @@ local dailyActiveUsers = graphPanel.new(
bars=true,
lines=false,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
// count singleuser-server pods
Expand Down Expand Up @@ -71,6 +82,7 @@ local userDistribution = graphPanel.new(
lines=false,
min=0,
x_axis_mode='histogram',
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
// count singleuser-server pods
Expand All @@ -95,6 +107,7 @@ local currentRunningUsers = graphPanel.new(
legend_max=true,
legend_current=true,
min=0,
datasource='$PROMETHEUS_DS'
).addTargets([
prometheus.target(
|||
Expand All @@ -112,8 +125,12 @@ dashboard.new(
uid='usage-dashboard',
tags=['jupyterhub'],
editable=true,
time_from='now-30d'
).addPanel(
time_from='now-30d',
).addTemplates(
templates
)

.addPanel(
monthlyActiveUsers, {},
).addPanel(
dailyActiveUsers, {},
Expand Down
Loading

0 comments on commit f63017a

Please sign in to comment.