From ce1e53955ac5b621752106efbca16877833997f6 Mon Sep 17 00:00:00 2001 From: Alexander Brand Date: Fri, 22 Jun 2018 10:15:44 -0400 Subject: [PATCH 1/2] monitoring: use histograms for api latency and cycle time metrics Signed-off-by: Alexander Brand --- deployment/grafana/02-grafana-configmap.yaml | 275 ++++++++++++++++++- discovery/pkg/metrics/metrics.go | 56 ++-- docs/monitoring.md | 4 +- 3 files changed, 296 insertions(+), 39 deletions(-) diff --git a/deployment/grafana/02-grafana-configmap.yaml b/deployment/grafana/02-grafana-configmap.yaml index f1d68d63..87fa3158 100644 --- a/deployment/grafana/02-grafana-configmap.yaml +++ b/deployment/grafana/02-grafana-configmap.yaml @@ -61,7 +61,7 @@ data: "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1528906914777, + "iteration": 1529672072587, "links": [], "panels": [ { @@ -1224,7 +1224,7 @@ data: "x": 6, "y": 20 }, - "id": 13, + "id": 25, "legend": { "alignAsTable": false, "avg": false, @@ -1250,18 +1250,40 @@ data: "steppedLine": false, "targets": [ { - "expr": "avg(gimbal_discoverer_api_latency_ms{backendname=~\"$Backend\"}) by (backendname)", + "expr": "histogram_quantile(0.5, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "{{backendname}}", + "legendFormat": "{{kubernetes_pod_name}} 50%", "refId": "A" + }, + { + "expr": "histogram_quantile(0.75, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 75%", + "refId": "D" + }, + { + "expr": "histogram_quantile(0.9, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 90% ", + "refId": "B" + }, + { + "expr": "histogram_quantile(1, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 100% ", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Openstack API Latency", + "title": "Openstack API Latency: Load Balancers Endpoint", "tooltip": { "shared": true, "sort": 0, @@ -1300,7 +1322,6 @@ data: "dashLength": 10, "dashes": false, "datasource": "prometheus", - "description": "Time to process all items within a cluster", "fill": 1, "gridPos": { "h": 7, @@ -1308,6 +1329,219 @@ data: "x": 12, "y": 20 }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 50%", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.75, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 75%", + "refId": "D" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 90%", + "refId": "B" + }, + { + "expr": "histogram_quantile(1, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 100%", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Openstack API Latency: Pools Endpoint", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 20 + }, + "id": 26, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 50%", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.75, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 75%", + "refId": "D" + }, + { + "expr": "histogram_quantile(0.9, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 90% ", + "refId": "B" + }, + { + "expr": "histogram_quantile(1, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 100% ", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Openstack API Latency: Listeners", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "description": "Time to process all items within a cluster", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 27 + }, "id": 11, "legend": { "alignAsTable": true, @@ -1333,13 +1567,34 @@ data: "steppedLine": false, "targets": [ { - "expr": "gimbal_discoverer_cycle_duration_ms{backendname=~\"$Backend\"} ", + "expr": "histogram_quantile(0.5, sum(rate(gimbal_discoverer_cycle_duration_seconds_bucket{backendname=~\"$Backend\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 50%", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.75, sum(rate(gimbal_discoverer_cycle_duration_seconds_bucket{backendname=~\"$Backend\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{backendname}}", + "legendFormat": "{{kubernetes_pod_name}} 75%", "refId": "A" + }, + { + "expr": "histogram_quantile(0.90, sum(rate(gimbal_discoverer_cycle_duration_seconds_bucket{backendname=~\"$Backend\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 90%", + "refId": "C" + }, + { + "expr": "histogram_quantile(1, sum(rate(gimbal_discoverer_cycle_duration_seconds_bucket{backendname=~\"$Backend\"}[5m])) by (le, kubernetes_pod_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}} 100%", + "refId": "D" } ], "thresholds": [], @@ -1361,7 +1616,7 @@ data: }, "yaxes": [ { - "format": "ms", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -1459,7 +1714,7 @@ data: "timezone": "", "title": "Gimbal Discovery", "uid": "ex4WqmZmk", - "version": 21 + "version": 4 } envoy.json: | { diff --git a/discovery/pkg/metrics/metrics.go b/discovery/pkg/metrics/metrics.go index 2040648f..926964a1 100644 --- a/discovery/pkg/metrics/metrics.go +++ b/discovery/pkg/metrics/metrics.go @@ -30,21 +30,21 @@ type DiscovererMetrics struct { } const ( - ServiceEventTimestampGauge = "gimbal_service_event_timestamp" - EndpointsEventTimestampGauge = "gimbal_endpoints_event_timestamp" - ServiceErrorTotalCounter = "gimbal_service_error_total" - EndpointsErrorTotalCounter = "gimbal_endpoints_error_total" - QueueSizeGauge = "gimbal_queuesize" - DiscovererAPILatencyMSGauge = "gimbal_discoverer_api_latency_ms" - DiscovererCycleDurationMSGauge = "gimbal_discoverer_cycle_duration_ms" - DiscovererErrorTotal = "gimbal_discoverer_error_total" - DiscovererUpstreamServicesGauge = "gimbal_discoverer_upstream_services_total" - DiscovererReplicatedServicesGauge = "gimbal_discoverer_replicated_services_total" - DiscovererInvalidServicesGauge = "gimbal_discoverer_invalid_services_total" - DiscovererUpstreamEndpointsGauge = "gimbal_discoverer_upstream_endpoints_total" - DiscovererReplicatedEndpointsGauge = "gimbal_discoverer_replicated_endpoints_total" - DiscovererInvalidEndpointsGauge = "gimbal_discoverer_invalid_endpoints_total" - DiscovererInfoGauge = "gimbal_discoverer_info" + ServiceEventTimestampGauge = "gimbal_service_event_timestamp" + EndpointsEventTimestampGauge = "gimbal_endpoints_event_timestamp" + ServiceErrorTotalCounter = "gimbal_service_error_total" + EndpointsErrorTotalCounter = "gimbal_endpoints_error_total" + QueueSizeGauge = "gimbal_queuesize" + DiscovererAPILatencyMsHistogram = "gimbal_discoverer_api_latency_milliseconds" + DiscovererCycleDurationSecondsHistogram = "gimbal_discoverer_cycle_duration_seconds" + DiscovererErrorTotal = "gimbal_discoverer_error_total" + DiscovererUpstreamServicesGauge = "gimbal_discoverer_upstream_services_total" + DiscovererReplicatedServicesGauge = "gimbal_discoverer_replicated_services_total" + DiscovererInvalidServicesGauge = "gimbal_discoverer_invalid_services_total" + DiscovererUpstreamEndpointsGauge = "gimbal_discoverer_upstream_endpoints_total" + DiscovererReplicatedEndpointsGauge = "gimbal_discoverer_replicated_endpoints_total" + DiscovererInvalidEndpointsGauge = "gimbal_discoverer_invalid_endpoints_total" + DiscovererInfoGauge = "gimbal_discoverer_info" ) // NewMetrics returns a map of Prometheus metrics @@ -89,17 +89,19 @@ func NewMetrics(BackendType, BackendName string) DiscovererMetrics { }, []string{"backendname", "backendtype"}, ), - DiscovererAPILatencyMSGauge: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: DiscovererAPILatencyMSGauge, - Help: "The milliseconds it takes for requests to return from a remote discoverer api", + DiscovererAPILatencyMsHistogram: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: DiscovererAPILatencyMsHistogram, + Help: "The milliseconds it takes for requests to return from a remote discoverer api", + Buckets: []float64{20, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 20000, 50000, 120000}, // milliseconds. largest bucket is 2 minutes. }, []string{"backendname", "backendtype", "path"}, ), - DiscovererCycleDurationMSGauge: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: DiscovererCycleDurationMSGauge, - Help: "The milliseconds it takes for all objects to be synced from a remote discoverer api", + DiscovererCycleDurationSecondsHistogram: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: DiscovererCycleDurationSecondsHistogram, + Help: "The seconds it takes for all objects to be synced from a remote backend", + Buckets: prometheus.LinearBuckets(60, 60, 10), // 10 buckets, each 30 wide }, []string{"backendname", "backendtype"}, ), @@ -228,17 +230,17 @@ func (d *DiscovererMetrics) QueueSizeGaugeMetric(size int) { // CycleDurationMetric formats a cycle duration gauge prometheus metric func (d *DiscovererMetrics) CycleDurationMetric(duration time.Duration) { - m, ok := d.Metrics[DiscovererCycleDurationMSGauge].(*prometheus.GaugeVec) + m, ok := d.Metrics[DiscovererCycleDurationSecondsHistogram].(*prometheus.HistogramVec) if ok { - m.WithLabelValues(d.BackendName, d.BackendType).Set(math.Floor(duration.Seconds() * 1e3)) + m.WithLabelValues(d.BackendName, d.BackendType).Observe(math.Floor(duration.Seconds())) } } // APILatencyMetric formats a cycle duration gauge prometheus metric func (d *DiscovererMetrics) APILatencyMetric(path string, duration time.Duration) { - m, ok := d.Metrics[DiscovererAPILatencyMSGauge].(*prometheus.GaugeVec) + m, ok := d.Metrics[DiscovererAPILatencyMsHistogram].(*prometheus.HistogramVec) if ok { - m.WithLabelValues(d.BackendName, d.BackendType, path).Set(math.Floor(duration.Seconds() * 1e3)) + m.WithLabelValues(d.BackendName, d.BackendType, path).Observe(math.Floor(duration.Seconds() * 1e3)) } } diff --git a/docs/monitoring.md b/docs/monitoring.md index b4f6b014..d1f29637 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -92,11 +92,11 @@ Detailed documentation on stats within Envoy is available on their site: https:/ - **gimbal_queuesize (gauge):** Number of items in process queue with the following labels: - backendname - backendtype - - **gimbal_discoverer_api_latency_ms (gauge):** The milliseconds it takes for requests to return from a remote discoverer api (for example Openstack) + - **gimbal_discoverer_api_latency_milliseconds (histogram):** The milliseconds it takes for requests to return from a remote discoverer api (for example OpenStack) - backendname - backendtype - path: API request path - - **gimbal_discoverer_cycle_duration_ms (gauge):** The milliseconds it takes for all objects to be synced from a remote discoverer api (for example Openstack) + - **gimbal_discoverer_cycle_duration_seconds (histogram):** The seconds it takes for all objects to be synced from a remote backend (for example OpenStack) - backendname - backendtype - **gimbal_discoverer_api_error_total (counter):** Number of errors that have occurred when accessing the OpenStack API From 3787d0c33b9cc42e1a05aaff4360ea0e96043515 Mon Sep 17 00:00:00 2001 From: Alexander Brand Date: Mon, 25 Jun 2018 16:41:50 -0400 Subject: [PATCH 2/2] latency dashboard: display 50th and 99th percentile Signed-off-by: Alexander Brand --- deployment/grafana/02-grafana-configmap.yaml | 102 ++++--------------- 1 file changed, 18 insertions(+), 84 deletions(-) diff --git a/deployment/grafana/02-grafana-configmap.yaml b/deployment/grafana/02-grafana-configmap.yaml index 87fa3158..1595e434 100644 --- a/deployment/grafana/02-grafana-configmap.yaml +++ b/deployment/grafana/02-grafana-configmap.yaml @@ -61,7 +61,7 @@ data: "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1529672072587, + "iteration": 1529959091609, "links": [], "panels": [ { @@ -1226,8 +1226,8 @@ data: }, "id": 25, "legend": { - "alignAsTable": false, - "avg": false, + "alignAsTable": true, + "avg": true, "current": true, "max": false, "min": false, @@ -1252,32 +1252,16 @@ data: { "expr": "histogram_quantile(0.5, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", - "hide": false, "intervalFactor": 1, "legendFormat": "{{kubernetes_pod_name}} 50%", "refId": "A" }, { - "expr": "histogram_quantile(0.75, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 75%", - "refId": "D" - }, - { - "expr": "histogram_quantile(0.9, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", + "expr": "histogram_quantile(0.99, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 90% ", + "legendFormat": "{{kubernetes_pod_name}} 99%", "refId": "B" - }, - { - "expr": "histogram_quantile(1, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*loadbalancers.*\"}[5m])) by (le, kubernetes_pod_name))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 100% ", - "refId": "C" } ], "thresholds": [], @@ -1331,8 +1315,8 @@ data: }, "id": 13, "legend": { - "alignAsTable": false, - "avg": false, + "alignAsTable": true, + "avg": true, "current": true, "max": false, "min": false, @@ -1357,34 +1341,16 @@ data: { "expr": "histogram_quantile(0.5, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", - "hide": false, "intervalFactor": 1, "legendFormat": "{{kubernetes_pod_name}} 50%", "refId": "A" }, { - "expr": "histogram_quantile(0.75, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 75%", - "refId": "D" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", + "expr": "histogram_quantile(0.99, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", - "hide": false, "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 90%", + "legendFormat": "{{kubernetes_pod_name}} 99%", "refId": "B" - }, - { - "expr": "histogram_quantile(1, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*pools.*\"}[5m])) by (le, kubernetes_pod_name))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 100%", - "refId": "C" } ], "thresholds": [], @@ -1438,8 +1404,8 @@ data: }, "id": 26, "legend": { - "alignAsTable": false, - "avg": false, + "alignAsTable": true, + "avg": true, "current": true, "max": false, "min": false, @@ -1464,32 +1430,16 @@ data: { "expr": "histogram_quantile(0.5, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", - "hide": false, "intervalFactor": 1, "legendFormat": "{{kubernetes_pod_name}} 50%", "refId": "A" }, { - "expr": "histogram_quantile(0.75, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 75%", - "refId": "D" - }, - { - "expr": "histogram_quantile(0.9, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", + "expr": "histogram_quantile(0.99, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 90% ", + "legendFormat": "{{kubernetes_pod_name}} 99%", "refId": "B" - }, - { - "expr": "histogram_quantile(1, sum(rate(gimbal_discoverer_api_latency_milliseconds_bucket{path=~\".*listeners.*\"}[5m])) by (le, kubernetes_pod_name))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 100% ", - "refId": "C" } ], "thresholds": [], @@ -1545,7 +1495,7 @@ data: "id": 11, "legend": { "alignAsTable": true, - "avg": false, + "avg": true, "current": true, "max": false, "min": false, @@ -1571,30 +1521,14 @@ data: "format": "time_series", "intervalFactor": 1, "legendFormat": "{{kubernetes_pod_name}} 50%", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.75, sum(rate(gimbal_discoverer_cycle_duration_seconds_bucket{backendname=~\"$Backend\"}[5m])) by (le, kubernetes_pod_name))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 75%", "refId": "A" }, { - "expr": "histogram_quantile(0.90, sum(rate(gimbal_discoverer_cycle_duration_seconds_bucket{backendname=~\"$Backend\"}[5m])) by (le, kubernetes_pod_name))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 90%", - "refId": "C" - }, - { - "expr": "histogram_quantile(1, sum(rate(gimbal_discoverer_cycle_duration_seconds_bucket{backendname=~\"$Backend\"}[5m])) by (le, kubernetes_pod_name))", + "expr": "histogram_quantile(0.99, sum(rate(gimbal_discoverer_cycle_duration_seconds_bucket{backendname=~\"$Backend\"}[5m])) by (le, kubernetes_pod_name))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}} 100%", - "refId": "D" + "legendFormat": "{{kubernetes_pod_name}} 99%", + "refId": "B" } ], "thresholds": [], @@ -1714,7 +1648,7 @@ data: "timezone": "", "title": "Gimbal Discovery", "uid": "ex4WqmZmk", - "version": 4 + "version": 2 } envoy.json: | {