From af2117e79c22f2a47c30a7ac7c540e4f792b6047 Mon Sep 17 00:00:00 2001 From: ekexium Date: Fri, 28 Jul 2023 16:43:35 +0800 Subject: [PATCH] metrics: Cherry pick #15118 to release-6.5: add min_safe_ts, min_safe_ts_region and min_safe_ts_gap (#15212) ref tikv/tikv#15082 metrics: add min_safe_ts, min_safe_ts_region and min_safe_ts_gap Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/advance.rs | 10 +- components/resolved_ts/src/endpoint.rs | 71 +- components/resolved_ts/src/metrics.rs | 25 +- components/resolved_ts/src/resolver.rs | 2 +- metrics/grafana/tikv_details.json | 6088 ++++++++++++------------ 5 files changed, 3197 insertions(+), 2999 deletions(-) diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 3f1b254991b..779a5199012 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -48,7 +48,7 @@ use txn_types::TimeStamp; use crate::{endpoint::Task, metrics::*}; -const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // 5s +pub(crate) const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // 5s const DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL: usize = 2; const DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS: usize = 4096; @@ -61,6 +61,9 @@ pub struct AdvanceTsWorker { /// The concurrency manager for transactions. It's needed for CDC to check /// locks when calculating resolved_ts. concurrency_manager: ConcurrencyManager, + + // cache the last pd tso, used to approximate the next timestamp w/o an actual TSO RPC + pub(crate) last_pd_tso: Arc>>, } impl AdvanceTsWorker { @@ -85,6 +88,7 @@ impl AdvanceTsWorker { advance_ts_interval, timer: SteadyTimer::default(), concurrency_manager, + last_pd_tso: Arc::new(std::sync::Mutex::new(None)), } } } @@ -107,9 +111,13 @@ impl AdvanceTsWorker { self.advance_ts_interval, )); + let last_pd_tso = self.last_pd_tso.clone(); let fut = async move { // Ignore get tso errors since we will retry every `advdance_ts_interval`. let mut min_ts = pd_client.get_tso().await.unwrap_or_default(); + if let Ok(mut last_pd_tso) = last_pd_tso.try_lock() { + *last_pd_tso = Some((min_ts, Instant::now())); + } // Sync with concurrency manager so that it can work correctly when // optimizations like async commit is enabled. diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 8d2ee1631b4..f114fd89a9b 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -35,13 +35,16 @@ use tokio::sync::Notify; use txn_types::{Key, TimeStamp}; use crate::{ - advance::{AdvanceTsWorker, LeadershipResolver}, + advance::{AdvanceTsWorker, LeadershipResolver, DEFAULT_CHECK_LEADER_TIMEOUT_DURATION}, cmd::{ChangeLog, ChangeRow}, metrics::*, resolver::Resolver, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, }; +/// grace period for logging safe-ts and resolved-ts gap in slow log +const SLOW_LOG_GRACE_PERIOD_MS: u64 = 1000; + enum ResolverStatus { Pending { tracked_index: u64, @@ -763,9 +766,18 @@ where let store_id = self.get_or_init_store_id(); let (mut oldest_ts, mut oldest_region, mut zero_ts_count) = (u64::MAX, 0, 0); let (mut oldest_leader_ts, mut oldest_leader_region) = (u64::MAX, 0); + let (mut oldest_safe_ts, mut oldest_safe_ts_region) = (u64::MAX, 0); self.region_read_progress.with(|registry| { for (region_id, read_progress) in registry { + let safe_ts = read_progress.safe_ts(); + if safe_ts > 0 && safe_ts < oldest_safe_ts { + oldest_safe_ts = safe_ts; + oldest_safe_ts_region = *region_id; + } + let (leader_info, leader_store_id) = read_progress.dump_leader_info(); + // this is maximum resolved-ts pushed to region_read_progress, namely candidates + // of safe_ts. It may not be the safe_ts yet let ts = leader_info.get_read_state().get_safe_ts(); if ts == 0 { zero_ts_count += 1; @@ -803,19 +815,62 @@ where } } } + // approximate a TSO from PD. It is better than local timestamp when clock skew + // exists. + let now: u64 = self + .advance_worker + .last_pd_tso + .try_lock() + .map(|opt| { + opt.map(|(pd_ts, instant)| { + pd_ts.physical() + instant.saturating_elapsed().as_millis() as u64 + }) + .unwrap_or_else(|| TimeStamp::physical_now()) + }) + .unwrap_or_else(|_| TimeStamp::physical_now()); + + RTS_MIN_SAFE_TS.set(oldest_safe_ts as i64); + RTS_MIN_SAFE_TS_REGION.set(oldest_safe_ts_region as i64); + let safe_ts_gap = now.saturating_sub(TimeStamp::from(oldest_safe_ts).physical()); + if safe_ts_gap + > self.cfg.advance_ts_interval.as_millis() + + DEFAULT_CHECK_LEADER_TIMEOUT_DURATION.as_millis() as u64 + + SLOW_LOG_GRACE_PERIOD_MS + { + let mut lock_num = None; + let mut min_start_ts = None; + if let Some(ob) = self.regions.get(&oldest_safe_ts_region) { + min_start_ts = ob + .resolver + .locks() + .keys() + .next() + .cloned() + .map(TimeStamp::into_inner); + lock_num = Some(ob.resolver.locks_by_key.len()); + } + info!( + "the max gap of safe-ts is large"; + "gap" => safe_ts_gap, + "oldest safe-ts" => ?oldest_safe_ts, + "region id" => oldest_safe_ts_region, + "advance-ts-interval" => ?self.cfg.advance_ts_interval, + "lock num" => lock_num, + "min start ts" => min_start_ts, + ); + } + RTS_MIN_SAFE_TS_GAP.set(safe_ts_gap as i64); + RTS_MIN_RESOLVED_TS_REGION.set(oldest_region as i64); RTS_MIN_RESOLVED_TS.set(oldest_ts as i64); RTS_ZERO_RESOLVED_TS.set(zero_ts_count as i64); - RTS_MIN_RESOLVED_TS_GAP.set( - TimeStamp::physical_now().saturating_sub(TimeStamp::from(oldest_ts).physical()) as i64, - ); + RTS_MIN_RESOLVED_TS_GAP + .set(now.saturating_sub(TimeStamp::from(oldest_ts).physical()) as i64); RTS_MIN_LEADER_RESOLVED_TS_REGION.set(oldest_leader_region as i64); RTS_MIN_LEADER_RESOLVED_TS.set(oldest_leader_ts as i64); - RTS_MIN_LEADER_RESOLVED_TS_GAP.set( - TimeStamp::physical_now().saturating_sub(TimeStamp::from(oldest_leader_ts).physical()) - as i64, - ); + RTS_MIN_LEADER_RESOLVED_TS_GAP + .set(now.saturating_sub(TimeStamp::from(oldest_leader_ts).physical()) as i64); RTS_LOCK_HEAP_BYTES_GAUGE.set(lock_heap_size as i64); RTS_REGION_RESOLVE_STATUS_GAUGE_VEC diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 3ec35685c36..979da747fb1 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -38,7 +38,7 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_RESOLVED_TS_GAP: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_resolved_ts_gap_millis", - "The minimal (non-zero) resolved ts gap for observe regions" + "The minimal (non-zero) resolved ts gap for observed regions" ) .unwrap(); pub static ref RTS_RESOLVED_FAIL_ADVANCE_VEC: IntCounterVec = register_int_counter_vec!( @@ -66,22 +66,37 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_RESOLVED_TS: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_resolved_ts", - "The minimal (non-zero) resolved ts for observe regions" + "The minimal (non-zero) resolved ts for observed regions" + ) + .unwrap(); + pub static ref RTS_MIN_SAFE_TS_REGION: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_safe_ts_region", + "The region which has minimal safe ts" + ) + .unwrap(); + pub static ref RTS_MIN_SAFE_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_safe_ts", + "The minimal (non-zero) safe ts for observed regions" + ) + .unwrap(); + pub static ref RTS_MIN_SAFE_TS_GAP: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_safe_ts_gap_millis", + "The minimal (non-zero) safe ts gap for observed regions" ) .unwrap(); pub static ref RTS_ZERO_RESOLVED_TS: IntGauge = register_int_gauge!( "tikv_resolved_ts_zero_resolved_ts", - "The number of zero resolved ts for observe regions" + "The number of zero resolved ts for observed regions" ) .unwrap(); pub static ref RTS_LOCK_HEAP_BYTES_GAUGE: IntGauge = register_int_gauge!( "tikv_resolved_ts_lock_heap_bytes", - "Total bytes in memory of resolved-ts observe regions's lock heap" + "Total bytes in memory of resolved-ts observed regions's lock heap" ) .unwrap(); pub static ref RTS_REGION_RESOLVE_STATUS_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_resolved_ts_region_resolve_status", - "The status of resolved-ts observe regions", + "The status of resolved-ts observed regions", &["type"] ) .unwrap(); diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index b341c546940..4bf72474fcf 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -13,7 +13,7 @@ use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; pub struct Resolver { region_id: u64, // key -> start_ts - locks_by_key: HashMap, TimeStamp>, + pub(crate) locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. lock_ts_heap: BTreeMap>>, // The timestamps that guarantees no more commit will happen before. diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 44ccf8ce7f7..13ed4965a66 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -64,7 +64,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1651043540619, + "iteration": 1690354913948, "links": [], "panels": [ { @@ -4532,7 +4532,6 @@ }, "yaxes": [ { - "$$hashKey": "object:150", "format": "s", "label": null, "logBase": 2, @@ -4541,7 +4540,6 @@ "show": true }, { - "$$hashKey": "object:151", "format": "short", "label": null, "logBase": 2, @@ -4632,7 +4630,6 @@ }, "yaxes": [ { - "$$hashKey": "object:150", "format": "s", "label": null, "logBase": 2, @@ -4641,7 +4638,6 @@ "show": true }, { - "$$hashKey": "object:151", "format": "short", "label": null, "logBase": 2, @@ -4697,7 +4693,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:80", "alias": "/.*/", "stack": "A" } @@ -4746,7 +4741,6 @@ }, "yaxes": [ { - "$$hashKey": "object:264", "format": "ns", "label": null, "logBase": 1, @@ -4755,7 +4749,6 @@ "show": true }, { - "$$hashKey": "object:265", "format": "short", "label": null, "logBase": 1, @@ -4811,7 +4804,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:62", "alias": "/.*/", "stack": "A" } @@ -4858,7 +4850,6 @@ }, "yaxes": [ { - "$$hashKey": "object:264", "format": "binBps", "label": null, "logBase": 1, @@ -4867,7 +4858,6 @@ "show": true }, { - "$$hashKey": "object:265", "format": "short", "label": null, "logBase": 1, @@ -5653,7 +5643,6 @@ }, "yaxes": [ { - "$$hashKey": "object:69", "format": "ops", "label": null, "logBase": 1, @@ -5662,7 +5651,6 @@ "show": true }, { - "$$hashKey": "object:70", "format": "short", "label": null, "logBase": 1, @@ -5761,7 +5749,6 @@ }, "yaxes": [ { - "$$hashKey": "object:69", "format": "µs", "label": null, "logBase": 1, @@ -5770,7 +5757,6 @@ "show": true }, { - "$$hashKey": "object:70", "format": "short", "label": null, "logBase": 1, @@ -10428,7 +10414,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:113", "alias": "count", "dashLength": 1, "dashes": true, @@ -10439,7 +10424,6 @@ "zindex": -3 }, { - "$$hashKey": "object:114", "alias": "avg", "fill": 7 } @@ -10502,7 +10486,6 @@ }, "yaxes": [ { - "$$hashKey": "object:139", "format": "s", "label": null, "logBase": 1, @@ -10511,7 +10494,6 @@ "show": true }, { - "$$hashKey": "object:140", "format": "short", "label": null, "logBase": 1, @@ -10572,7 +10554,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:217", "alias": "count", "dashLength": 1, "dashes": true, @@ -10583,7 +10564,6 @@ "zindex": -3 }, { - "$$hashKey": "object:218", "alias": "avg", "fill": 7 } @@ -10646,7 +10626,6 @@ }, "yaxes": [ { - "$$hashKey": "object:243", "format": "s", "label": null, "logBase": 1, @@ -10655,7 +10634,6 @@ "show": true }, { - "$$hashKey": "object:244", "format": "short", "label": null, "logBase": 1, @@ -10716,7 +10694,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:529", "alias": "count", "dashLength": 1, "dashes": true, @@ -10727,7 +10704,6 @@ "zindex": -3 }, { - "$$hashKey": "object:530", "alias": "avg", "fill": 7 } @@ -10790,7 +10766,6 @@ }, "yaxes": [ { - "$$hashKey": "object:555", "format": "s", "label": null, "logBase": 1, @@ -10799,7 +10774,6 @@ "show": true }, { - "$$hashKey": "object:556", "format": "short", "label": null, "logBase": 1, @@ -10860,7 +10834,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:529", "alias": "count", "dashLength": 1, "dashes": true, @@ -10871,7 +10844,6 @@ "zindex": -3 }, { - "$$hashKey": "object:530", "alias": "avg", "fill": 7 } @@ -10934,7 +10906,6 @@ }, "yaxes": [ { - "$$hashKey": "object:555", "format": "s", "label": null, "logBase": 1, @@ -10943,7 +10914,6 @@ "show": true }, { - "$$hashKey": "object:556", "format": "short", "label": null, "logBase": 1, @@ -11144,7 +11114,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:733", "alias": "count", "dashLength": 1, "dashes": true, @@ -11155,7 +11124,6 @@ "zindex": -3 }, { - "$$hashKey": "object:734", "alias": "avg", "fill": 7 } @@ -11218,7 +11186,6 @@ }, "yaxes": [ { - "$$hashKey": "object:759", "format": "s", "label": null, "logBase": 1, @@ -11227,7 +11194,6 @@ "show": true }, { - "$$hashKey": "object:760", "format": "short", "label": null, "logBase": 1, @@ -11428,7 +11394,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:841", "alias": "count", "dashLength": 1, "dashes": true, @@ -11439,7 +11404,6 @@ "zindex": -3 }, { - "$$hashKey": "object:842", "alias": "avg", "fill": 7 } @@ -11502,7 +11466,6 @@ }, "yaxes": [ { - "$$hashKey": "object:867", "format": "s", "label": null, "logBase": 1, @@ -11511,7 +11474,6 @@ "show": true }, { - "$$hashKey": "object:868", "format": "short", "label": null, "logBase": 1, @@ -15871,7 +15833,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "tidb-cluster", + "datasource": "${DS_TEST-CLUSTER}", "editable": true, "error": false, "fieldConfig": { @@ -15969,7 +15931,6 @@ }, "yaxes": [ { - "$$hashKey": "object:270", "format": "s", "label": null, "logBase": 1, @@ -15978,7 +15939,6 @@ "show": true }, { - "$$hashKey": "object:271", "format": "short", "label": null, "logBase": 1, @@ -16077,7 +16037,6 @@ }, "yaxes": [ { - "$$hashKey": "object:70", "format": "short", "label": null, "logBase": 1, @@ -16086,7 +16045,6 @@ "show": true }, { - "$$hashKey": "object:71", "format": "short", "label": null, "logBase": 1, @@ -17561,7 +17519,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:521", "alias": "/pending-task/", "transform": "negative-Y", "yaxis": 2 @@ -17632,7 +17589,6 @@ }, "yaxes": [ { - "$$hashKey": "object:86", "decimals": null, "format": "s", "label": null, @@ -17642,7 +17598,6 @@ "show": true }, { - "$$hashKey": "object:87", "format": "short", "label": null, "logBase": 1, @@ -18839,237 +18794,233 @@ "yBucketSize": null }, { - "type": "graph", - "title": "Storage async snapshot duration", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async snapshot duration", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "x": 0, - "y": 35, + "h": 8, "w": 12, - "h": 8 + "x": 0, + "y": 35 }, + "hiddenSeries": false, "id": 20000, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { + "exemplar": true, "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", - "legendFormat": "99%", "interval": "", - "exemplar": true, - "refId": "A", + "intervalFactor": 2, + "legendFormat": "99%", "queryType": "randomWalk", - "intervalFactor": 2 + "refId": "A" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", - "legendFormat": "95%", - "interval": "", "exemplar": true, - "refId": "B", + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", "hide": false, - "intervalFactor": 2 + "interval": "", + "intervalFactor": 2, + "legendFormat": "95%", + "refId": "B" }, { - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", - "legendFormat": "avg", - "interval": "", "exemplar": true, - "refId": "C", + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", "hide": false, - "intervalFactor": 2 + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg", + "refId": "C" } ], - "options": { - "alertThreshold": true + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Storage async snapshot duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "fieldConfig": { - "defaults": {}, - "overrides": [] + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "pluginVersion": "7.5.10", - "renderer": "flot", "yaxes": [ { + "format": "s", "label": null, - "show": true, "logBase": 1, - "min": null, "max": null, - "format": "s", - "$$hashKey": "object:295" + "min": null, + "show": true }, { + "format": "short", "label": null, - "show": true, "logBase": 1, - "min": null, "max": null, - "format": "short", - "$$hashKey": "object:296" + "min": null, + "show": true } ], - "xaxis": { - "show": true, - "mode": "time", - "name": null, - "values": [], - "buckets": null - }, "yaxis": { "align": false, "alignLevel": null - }, - "lines": true, - "fill": 1, - "linewidth": 1, + } + }, + { + "aliasColors": {}, + "bars": false, "dashLength": 10, - "spaceLength": 10, - "pointradius": 2, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async write duration", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "hiddenSeries": false, + "id": 20001, "legend": { - "show": true, - "values": false, - "min": false, - "max": false, + "avg": false, "current": false, + "max": false, + "min": false, + "show": true, "total": false, - "avg": false + "values": false }, + "lines": true, + "linewidth": 1, "nullPointMode": "null", - "tooltip": { - "value_type": "individual", - "shared": true, - "sort": 0 + "options": { + "alertThreshold": true }, - "aliasColors": {}, - "seriesOverrides": [], - "thresholds": [], - "timeRegions": [], - "description": "The storage async snapshot duration", - "datasource": "${DS_TEST-CLUSTER}", - "fillGradient": 0, - "dashes": false, - "hiddenSeries": false, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, "points": false, - "bars": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "stack": false, - "percentage": false, "steppedLine": false, - "timeFrom": null, - "timeShift": null - }, - { - "type": "graph", - "title": "Storage async write duration", - "gridPos": { - "x": 12, - "y": 35, - "w": 12, - "h": 8 - }, - "id": 20001, "targets": [ { + "exemplar": true, "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", - "legendFormat": "99%", "interval": "", - "exemplar": true, - "refId": "A", - "intervalFactor": 1 + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", - "legendFormat": "95%", - "interval": "", "exemplar": true, - "refId": "B", + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", "hide": false, - "intervalFactor": 1 + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "refId": "B" }, { - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m]))", - "legendFormat": "avg", - "interval": "", "exemplar": true, - "refId": "C", + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m]))", "hide": false, - "intervalFactor": 1 + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "refId": "C" } ], - "options": { - "alertThreshold": true + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Storage async write duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "pluginVersion": "7.5.10", - "renderer": "flot", "yaxes": [ { + "format": "s", "label": null, - "show": true, "logBase": 1, - "min": null, "max": null, - "format": "s", - "$$hashKey": "object:494" + "min": null, + "show": true }, { + "format": "short", "label": null, - "show": true, "logBase": 1, - "min": null, "max": null, - "format": "short", - "$$hashKey": "object:495" + "min": null, + "show": true } ], - "xaxis": { - "show": true, - "mode": "time", - "name": null, - "values": [], - "buckets": null - }, "yaxis": { "align": false, "alignLevel": null - }, - "lines": true, - "fill": 2, - "linewidth": 1, - "dashLength": 10, - "spaceLength": 10, - "pointradius": 2, - "legend": { - "show": true, - "values": false, - "min": false, - "max": false, - "current": false, - "total": false, - "avg": false - }, - "nullPointMode": "null", - "tooltip": { - "value_type": "individual", - "shared": true, - "sort": 0 - }, - "aliasColors": {}, - "seriesOverrides": [], - "thresholds": [], - "timeRegions": [], - "description": "The storage async write duration", - "fillGradient": 0, - "dashes": false, - "hiddenSeries": false, - "points": false, - "bars": false, - "stack": false, - "percentage": false, - "steppedLine": false, - "timeFrom": null, - "timeShift": null + } } ], "repeat": null, @@ -21370,6 +21321,13 @@ "pointradius": 5, "points": false, "renderer": "flot", + "scopedVars": { + "command": { + "selected": false, + "text": "acquire_pessimistic_lock", + "value": "acquire_pessimistic_lock" + } + }, "seriesOverrides": [], "spaceLength": 10, "stack": false, @@ -21433,7 +21391,6 @@ }, "yaxes": [ { - "$$hashKey": "object:95", "format": "s", "label": null, "logBase": 1, @@ -21442,7 +21399,6 @@ "show": true }, { - "$$hashKey": "object:96", "format": "short", "label": null, "logBase": 1, @@ -21549,7 +21505,7 @@ "h": 1, "w": 24, "x": 0, - "y": 34 + "y": 26 }, "id": 2755, "panels": [ @@ -22075,7 +22031,7 @@ "h": 1, "w": 24, "x": 0, - "y": 35 + "y": 27 }, "id": 2758, "panels": [ @@ -23415,7 +23371,7 @@ "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 28 }, "id": 2759, "panels": [ @@ -24032,7 +23988,7 @@ "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 29 }, "id": 2760, "panels": [ @@ -24444,7 +24400,7 @@ "h": 1, "w": 24, "x": 0, - "y": 38 + "y": 30 }, "id": 2757, "panels": [ @@ -24464,6 +24420,10 @@ "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed to handle coprocessor read requests", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { "h": 8, "w": 12, @@ -24488,6 +24448,7 @@ "values": true }, "links": [], + "reverseYBuckets": false, "targets": [ { "expr": "sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", @@ -24508,6 +24469,8 @@ "xAxis": { "show": true }, + "xBucketNumber": null, + "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", @@ -24518,13 +24481,6 @@ "splitFactor": null }, "yBucketBound": "upper", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "reverseYBuckets": false, - "xBucketNumber": null, - "xBucketSize": null, "yBucketNumber": null, "yBucketSize": null }, @@ -25248,7 +25204,7 @@ "h": 1, "w": 24, "x": 0, - "y": 39 + "y": 31 }, "id": 3197, "panels": [ @@ -26342,7 +26298,7 @@ "h": 1, "w": 24, "x": 0, - "y": 40 + "y": 32 }, "id": 2761, "panels": [ @@ -26739,7 +26695,7 @@ "h": 1, "w": 24, "x": 0, - "y": 41 + "y": 33 }, "id": 2762, "panels": [ @@ -31479,7 +31435,7 @@ "h": 1, "w": 24, "x": 0, - "y": 42 + "y": 34 }, "id": 12802, "panels": [ @@ -32521,7 +32477,7 @@ "h": 1, "w": 24, "x": 0, - "y": 43 + "y": 35 }, "id": 3301, "panels": [ @@ -35530,7 +35486,7 @@ "h": 1, "w": 24, "x": 0, - "y": 44 + "y": 36 }, "id": 2820, "panels": [ @@ -36478,7 +36434,7 @@ "h": 1, "w": 24, "x": 0, - "y": 45 + "y": 37 }, "id": 8389, "panels": [ @@ -36529,7 +36485,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -36637,7 +36593,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -36745,7 +36701,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -36812,7 +36768,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved tso and current time", + "description": "The gap between resolved ts (the maximum candidate of safe-ts) and current time.", "editable": true, "error": false, "fieldConfig": { @@ -36852,7 +36808,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -36874,7 +36830,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Max Resolved TS gap", + "title": "Max gap of resolved ts", "tooltip": { "msResolution": false, "shared": true, @@ -36918,7 +36874,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved tso of leaders and current time", + "description": "The gap between safe ts and current time", "editable": true, "error": false, "fieldConfig": { @@ -36935,7 +36891,7 @@ "y": 46 }, "hiddenSeries": false, - "id": 23763572077, + "id": 23763573680, "legend": { "alignAsTable": true, "avg": false, @@ -36958,7 +36914,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -36969,7 +36925,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(tikv_resolved_ts_min_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -36982,7 +36938,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Max Leader Resolved TS gap", + "title": "Max gap of safe ts", "tooltip": { "msResolution": false, "shared": true, @@ -37067,7 +37023,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -37142,7 +37098,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The region that its leader has minimal resolved ts.", + "description": "The region that has minimal safe ts", "editable": true, "error": false, "fieldConfig": { @@ -37159,7 +37115,7 @@ "y": 54 }, "hiddenSeries": false, - "id": 23763572079, + "id": 23763573681, "legend": { "alignAsTable": true, "avg": false, @@ -37182,7 +37138,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -37198,7 +37154,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(tikv_resolved_ts_min_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "interval": "", @@ -37212,7 +37168,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Min Leader Resolved TS Region", + "title": "Min Safe TS Region", "tooltip": { "msResolution": false, "shared": true, @@ -37250,197 +37206,13 @@ "alignLevel": null } }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handle a check leader request", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 62 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 9168, - "legend": { - "show": false - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "metric": "", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Check leader duration", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The status of resolved-ts observe regions", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 62 - }, - "hiddenSeries": false, - "id": 8377, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Observe region status", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of region count in a check leader request", + "description": "The gap between resolved tso of leaders and current time", "editable": true, "error": false, "fieldConfig": { @@ -37454,10 +37226,10 @@ "h": 8, "w": 12, "x": 0, - "y": 70 + "y": 62 }, "hiddenSeries": false, - "id": 12308, + "id": 23763572077, "legend": { "alignAsTable": true, "avg": false, @@ -37480,7 +37252,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -37490,26 +37262,26 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "hide": false, + "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", "refId": "A", - "step": 40 + "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "99% CheckLeader request region count", + "title": "Max Leader Resolved TS gap", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -37521,7 +37293,7 @@ }, "yaxes": [ { - "format": "short", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -37549,7 +37321,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The count of fail to advance resolved-ts", + "description": "The region that its leader has minimal resolved ts.", "editable": true, "error": false, "fieldConfig": { @@ -37563,15 +37335,14 @@ "h": 8, "w": 12, "x": 12, - "y": 70 + "y": 62 }, "hiddenSeries": false, - "id": 9166, + "id": 23763572079, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, "max": true, "min": false, "rightSide": true, @@ -37590,7 +37361,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -37605,11 +37376,13 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{reason}}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", "refId": "A", "step": 10 } @@ -37618,7 +37391,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Fail advance ts count", + "title": "Min Leader Resolved TS Region", "tooltip": { "msResolution": false, "shared": true, @@ -37635,7 +37408,7 @@ }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -37643,7 +37416,7 @@ "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -37656,6 +37429,77 @@ "alignLevel": null } }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed when handle a check leader request", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 70 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 9168, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Check leader duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, @@ -37663,7 +37507,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "Total bytes in memory of resolved-ts observe regions's lock heap", + "description": "The status of resolved-ts observe regions", "editable": true, "error": false, "fieldConfig": { @@ -37676,11 +37520,11 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 78 + "x": 12, + "y": 70 }, "hiddenSeries": false, - "id": 8379, + "id": 8377, "legend": { "alignAsTable": true, "avg": false, @@ -37703,20 +37547,26 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{type}}", "refId": "A", "step": 10 } @@ -37725,12 +37575,12 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Lock heap size", + "title": "Observe region status", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -37742,7 +37592,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -37769,7 +37619,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of the check leader request size", + "description": "Bucketed histogram of region count in a check leader request", "editable": true, "error": false, "fieldConfig": { @@ -37782,11 +37632,11 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, + "x": 0, "y": 78 }, "hiddenSeries": false, - "id": 8383, + "id": 12308, "legend": { "alignAsTable": true, "avg": false, @@ -37809,7 +37659,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -37819,7 +37669,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -37827,21 +37677,13 @@ "metric": "tikv_snapshot_size_bucket", "refId": "A", "step": 40 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "{{instance}}-check-num", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "99% CheckLeader request size", + "title": "99% CheckLeader request region count", "tooltip": { "msResolution": false, "shared": true, @@ -37858,7 +37700,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -37886,7 +37728,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "Total bytes of pending commands in the channel", + "description": "The count of fail to advance resolved-ts", "editable": true, "error": false, "fieldConfig": { @@ -37900,14 +37742,15 @@ "h": 8, "w": 12, "x": 12, - "y": 86 + "y": 78 }, "hiddenSeries": false, - "id": 8381, + "id": 9166, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": false, "max": true, "min": false, "rightSide": true, @@ -37926,20 +37769,26 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-{{reason}}", "refId": "A", "step": 10 } @@ -37948,12 +37797,12 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Pending command size", + "title": "Fail advance ts count", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -37965,7 +37814,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -37985,44 +37834,41 @@ "align": false, "alignLevel": null } - } - ], - "title": "Resolved-TS", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 46 - }, - "id": 2763, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "fill": 1, + "decimals": 1, + "description": "Total bytes in memory of resolved-ts observe regions's lock heap", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 505 + "y": 86 }, - "id": 2696, + "hiddenSeries": false, + "id": 8379, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -38032,7 +37878,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -38042,19 +37892,21 @@ "steppedLine": false, "targets": [ { - "expr": "tikv_allocator_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "hide": false, "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Allocator Stats", + "title": "Lock heap size", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -38069,11 +37921,11 @@ }, "yaxes": [ { - "format": "decbytes", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -38089,47 +37941,31 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "Memory", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 47 - }, - "id": 3922, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "Bucketed histogram of the check leader request size", "editable": true, "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, - "x": 0, - "y": 65 + "x": 12, + "y": 86 }, "hiddenSeries": false, - "id": 3924, + "id": 8383, "legend": { "alignAsTable": true, "avg": false, @@ -38139,7 +37975,6 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -38148,62 +37983,44 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/backup-auto-throttle/", - "fill": 5, - "fillGradient": 2, - "linewidth": 0 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"b.*k.*w.*k.*\"}[1m])) by (instance)", + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "backup-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", + "legendFormat": "{{instance}}", + "metric": "tikv_snapshot_size_bucket", "refId": "A", - "step": 4 + "step": 40 }, { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_io\"}[1m])) by (instance)", + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", "format": "time_series", "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "backup-io-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "B", - "step": 4 - }, - { - "exemplar": true, - "expr": "tikv_backup_softlimit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", - "hide": false, - "interval": "", - "legendFormat": "backup-auto-throttle-{{instance}}", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "{{instance}}-check-num", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Backup CPU Utilization", + "title": "99% CheckLeader request size", "tooltip": { "msResolution": false, "shared": true, @@ -38220,7 +38037,369 @@ }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "Total bytes of pending commands in the channel", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 94 + }, + "hiddenSeries": false, + "id": 8381, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pending command size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Resolved-TS", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 2763, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 505 + }, + "id": 2696, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tikv_allocator_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Allocator Stats", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Memory", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 3922, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 65 + }, + "hiddenSeries": false, + "id": 3924, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/backup-auto-throttle/", + "fill": 5, + "fillGradient": 2, + "linewidth": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"b.*k.*w.*k.*\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "backup-{{instance}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_io\"}[1m])) by (instance)", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "backup-io-{{instance}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "B", + "step": 4 + }, + { + "exemplar": true, + "expr": "tikv_backup_softlimit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "hide": false, + "interval": "", + "legendFormat": "backup-auto-throttle-{{instance}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Backup CPU Utilization", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -40973,11 +41152,11 @@ { "exemplar": true, "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": true, "interval": "", "legendFormat": "{{instance}}", "queryType": "randomWalk", - "refId": "A", - "hide": true + "refId": "A" }, { "exemplar": true, @@ -41040,596 +41219,301 @@ "h": 1, "w": 24, "x": 0, - "y": 54 + "y": 40 }, - "id": 13016, + "id": 4466, "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "from": "", - "id": 1, - "text": "Disabled", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "text": "Enabled", - "to": "", - "type": 1, - "value": "1" - } - ], - "noValue": "Disabled", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(0, 0, 0, 0.2)", - "value": null - }, - { - "color": "dark-red", - "value": 0 - }, - { - "color": "dark-green", - "value": 1 - } - ] - } - }, - "overrides": [] - }, + "description": "Total number of encryption data keys in use", + "fill": 1, "gridPos": { - "h": 4, - "w": 5, + "h": 8, + "w": 12, "x": 0, - "y": 55 + "y": 58 }, - "id": 14361, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" + "id": 4464, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_log_backup_enabled{instance=~\"$instance\"}", - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "queryType": "randomWalk", + "expr": "tikv_encryption_data_key_storage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", "refId": "A" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Endpoint Status", - "transformations": [], - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "description": "The average flush size of last 30mins.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [] + "title": "Encryption data keys", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "gridPos": { - "h": 8, - "w": 8, - "x": 5, - "y": 55 + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "id": 14507, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ { - "exemplar": true, - "expr": "increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]) / on(instance) increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "timeFrom": null, - "timeShift": null, - "title": "Average Flush Size ", - "type": "stat" + "yaxis": { + "align": false, + "alignLevel": null + } }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The current total flushed file number of this run.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, + "description": "Number of files being encrypted", + "fill": 1, "gridPos": { "h": 8, - "w": 8, - "x": 13, - "y": 55 + "w": 12, + "x": 12, + "y": 58 }, - "id": 14363, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" + "id": 4554, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "round(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m]))", - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "queryType": "randomWalk", + "expr": "tikv_encryption_file_num{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", "refId": "A" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Flushed Files (Last 30m) Per Host", - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] + "title": "Encrypted files", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "gridPos": { - "h": 2, - "w": 3, - "x": 21, - "y": 55 + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "id": 14508, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ { - "exemplar": true, - "expr": "round(sum(increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])))", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "timeFrom": null, - "timeShift": null, - "title": "Flush Times (Last 30m)", - "type": "stat" + "yaxis": { + "align": false, + "alignLevel": null + } }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, + "description": "Flag to indicate if encryption is initialized", + "fill": 1, "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 57 + "h": 8, + "w": 12, + "x": 0, + "y": 66 }, - "id": 14362, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" + "id": 4555, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]))", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "expr": "tikv_encryption_is_initialized{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Total Flushed Size (Last 30m)", - "type": "stat" + "title": "Encryption initialized", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "from": "", - "id": 1, - "text": "Running", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "text": "Paused", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 3, - "text": "Error", - "to": "", - "type": 1, - "value": "2" - } - ], - "noValue": "Disabled", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(0, 0, 0, 0.2)", - "value": null - }, - { - "color": "dark-green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 1 - }, - { - "color": "dark-red", - "value": 2 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 0, - "y": 59 - }, - "id": 14907, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "min(tikv_log_backup_task_status{instance=~\"$instance\"})", - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "queryType": "randomWalk", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Task Status", - "transformations": [], - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "dark-blue", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 2, - "y": 59 - }, - "id": 15361, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "name" - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "tidb_log_backup_advancer_owner > 0", - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Advancer Owner", - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 60 - }, - "id": 14911, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "round(sum(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m])))", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Flush Files (Last 30m)", - "type": "stat" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, + "description": "Total size of encryption meta files", + "fill": 1, "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 63 + "h": 8, + "w": 12, + "x": 12, + "y": 66 }, - "hiddenSeries": false, - "id": 13262, + "id": 4556, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, - "hideEmpty": true, "max": true, - "min": false, - "rightSide": false, + "min": true, + "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, @@ -41637,12 +41521,8 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -41651,25 +41531,19 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}[2m])) by (instance)", + "expr": "tikv_encryption_meta_file_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{name}}-{{instance}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "CPU Usage", + "title": "Encryption meta files size", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -41684,16 +41558,14 @@ }, "yaxes": [ { - "$$hashKey": "object:646", - "format": "percentunit", + "format": "decbytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:647", "format": "short", "label": null, "logBase": 1, @@ -41714,26 +41586,21 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 63 + "h": 8, + "w": 12, + "x": 0, + "y": 74 }, - "hiddenSeries": false, - "id": 12843, + "id": 4557, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, - "max": false, + "max": true, "min": false, + "rightSide": true, "show": true, "total": false, "values": true @@ -41742,11 +41609,7 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -41756,21 +41619,25 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_log_backup_handle_kv_batch_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"encrypt_data_nanos\"}[1m])) by (req)", "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", + "intervalFactor": 1, + "legendFormat": "encrypt-{{req}}", "refId": "A" + }, + { + "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"decrypt_data_nanos\"}[1m])) by (req)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "decrypt-{{req}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Handle Event Rate", + "title": "Encrypt/decrypt data nanos", "tooltip": { "shared": true, "sort": 0, @@ -41786,16 +41653,14 @@ }, "yaxes": [ { - "$$hashKey": "object:563", - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:564", "format": "short", "label": null, "logBase": 1, @@ -41815,39 +41680,31 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The data rate of initial scanning emitting events.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "description": "Writing or reading file duration (second)", "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, + "h": 8, + "w": 12, "x": 12, - "y": 63 + "y": 74 }, - "hiddenSeries": false, - "id": 14135, + "id": 4559, "legend": { "alignAsTable": true, - "avg": true, - "current": false, + "avg": false, + "current": true, "max": true, "min": false, + "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, + "links": [], + "nullPointMode": "null as zero", "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -41857,20 +41714,36 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_log_backup_incremental_scan_bytes_sum{instance=~\"$instance\"}[$__rate_interval])", + "expr": "histogram_quantile(1, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", + "format": "time_series", + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "queryType": "randomWalk", + "legendFormat": "max-{{type}}-{{operation}}", "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "95%-{{type}}-{{operation}}", + "refId": "B" + }, + { + "expr": "sum(rate(tikv_encryption_write_read_file_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation) / sum(rate(tikv_encryption_write_read_file_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg-{{type}}-{{operation}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Initial Scan Generate Event Throughput", + "title": "Read/write encryption meta duration", "tooltip": { "shared": true, "sort": 0, @@ -41886,8 +41759,8 @@ }, "yaxes": [ { - "$$hashKey": "object:136", - "format": "binBps", + "decimals": null, + "format": "s", "label": null, "logBase": 1, "max": null, @@ -41895,12 +41768,11 @@ "show": true }, { - "$$hashKey": "object:137", "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -41908,1381 +41780,1129 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "Encryption", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 13016, + "panels": [ { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 600000 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "5m", - "frequency": "1m", - "handler": 1, - "name": "Checkpoint Lag Too Huge", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "fieldConfig": { "defaults": { - "unit": "ms" + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "from": "", + "id": 1, + "text": "Disabled", + "to": "", + "type": 1, + "value": "0" + }, + { + "from": "", + "id": 2, + "text": "Enabled", + "to": "", + "type": 1, + "value": "1" + } + ], + "noValue": "Disabled", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(0, 0, 0, 0.2)", + "value": null + }, + { + "color": "dark-red", + "value": 0 + }, + { + "color": "dark-green", + "value": 1 + } + ] + } }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 63 - }, - "hiddenSeries": false, - "id": 14774, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "h": 4, + "w": 5, + "x": 0, + "y": 55 }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "id": 14361, "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "time() * 1000 - max(tidb_log_backup_last_checkpoint / 262144 > 0) by (task)", - "instant": false, - "interval": "", - "legendFormat": "{{ task }}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "time() * 1000", - "hide": true, - "interval": "", - "legendFormat": "Current Time", - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 600000, - "visible": true - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Abnormal Checkpoint TS Lag", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:228", - "format": "ms", - "label": null, - "logBase": 1, - "max": "3000000", - "min": "0", - "show": true + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false }, - { - "$$hashKey": "object:229", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The estimated memory usage by the streaming backup module.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 73 - }, - "hiddenSeries": false, - "id": 13100, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "tikv_log_backup_heap_memory{instance=~\"$instance\"}", - "format": "time_series", - "instant": false, + "expr": "tikv_log_backup_enabled{instance=~\"$instance\"}", + "instant": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{ instance }}", + "queryType": "randomWalk", "refId": "A" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Memory Of Events", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:563", - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:564", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Endpoint Status", + "transformations": [], + "type": "stat" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The average flush size of last 30mins.", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 73 - }, - "hiddenSeries": false, - "id": 14630, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true + "h": 8, + "w": 8, + "x": 5, + "y": 55 }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "id": 14507, "options": { - "alertThreshold": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "tikv_log_backup_observed_region{instance=~\"$instance\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum(tikv_log_backup_observed_region{instance=~\"$instance\"})", + "expr": "increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]) / on(instance) increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])", "hide": false, + "instant": true, "interval": "", - "legendFormat": "total", + "legendFormat": "{{ instance }}", "refId": "B" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Observed Region Count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:136", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:137", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Average Flush Size ", + "type": "stat" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.\n**They are retryable, don't worry.**", + "description": "The current total flushed file number of this run.", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 73 - }, - "hiddenSeries": false, - "id": 13101, - "legend": { - "avg": false, - "current": false, - "hideZero": true, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false + "h": 8, + "w": 8, + "x": 13, + "y": 55 }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 14363, "options": { - "alertThreshold": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "increase(tikv_log_backup_errors{instance=~\"$instance\"}[$__interval])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{type}}@{{instance}}", + "expr": "round(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m]))", + "instant": true, + "interval": "", + "legendFormat": "{{ instance }}", + "queryType": "randomWalk", "refId": "A" - }, - { - "exemplar": true, - "expr": "tikv_log_backup_errors{instance=~\"$instance\"}", - "hide": true, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:563", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:564", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Flushed Files (Last 30m) Per Host", + "type": "stat" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 73 - }, - "hiddenSeries": false, - "id": 14910, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "h": 2, + "w": 3, + "x": 21, + "y": 55 }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "id": 14508, "options": { - "alertThreshold": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:240", - "alias": "Current Time", - "dashes": true, - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "max(tidb_log_backup_last_checkpoint{instance=~\"$instance\"} / 262144 > 0) by (task)", - "instant": false, - "interval": "", - "legendFormat": "{{ task }}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "time() * 1000", + "expr": "round(sum(increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])))", "hide": false, + "instant": true, "interval": "", - "legendFormat": "Current Time", + "legendFormat": "{{ instance }}", "refId": "B" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Checkpoint TS of Tasks", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:169", - "format": "dateTimeAsIsoNoDateIfToday", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:170", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Flush Times (Last 30m)", + "type": "stat" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.", + "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 78 - }, - "hiddenSeries": false, - "id": 14908, - "legend": { - "avg": false, - "current": false, - "hideZero": true, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false + "h": 3, + "w": 3, + "x": 21, + "y": 57 }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 14362, "options": { - "alertThreshold": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "increase(tikv_log_backup_fatal_errors{instance=~\"$instance\"}[$__interval])", - "format": "time_series", + "expr": "sum(increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]))", "hide": false, - "instant": false, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{type}}@{{instance}}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "", - "hide": true, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "", + "instant": true, + "interval": "", + "legendFormat": "{{ instance }}", "refId": "B" } ], - "thresholds": [ - { - "$$hashKey": "object:3232", - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0, - "yaxis": "left" - } - ], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Fatal Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:563", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:564", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Total Flushed Size (Last 30m)", + "type": "stat" }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateBlues", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of flushing a batch of file.", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "from": "", + "id": 1, + "text": "Running", + "to": "", + "type": 1, + "value": "0" + }, + { + "from": "", + "id": 2, + "text": "Paused", + "to": "", + "type": 1, + "value": "1" + }, + { + "from": "", + "id": 3, + "text": "Error", + "to": "", + "type": 1, + "value": "2" + } + ], + "noValue": "Disabled", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(0, 0, 0, 0.2)", + "value": null + }, + { + "color": "dark-green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "dark-red", + "value": 2 + } + ] + } + }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 6, + "h": 4, + "w": 2, "x": 0, - "y": 83 + "y": 59 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 14078, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "id": 14907, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "links": [], - "reverseYBuckets": false, + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_flush_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_files\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "min(tikv_log_backup_task_status{instance=~\"$instance\"})", + "instant": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{ instance }}", + "queryType": "randomWalk", "refId": "A" } ], - "title": "Flush Duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "timeFrom": null, + "timeShift": null, + "title": "Task Status", + "transformations": [], + "type": "stat" }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateReds", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of scanning the initial data from local DB and transform them into apply events. \n", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-blue", + "value": null + } + ] + }, + "unit": "none" + }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 83 + "h": 4, + "w": 3, + "x": 2, + "y": 59 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 14136, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "id": 15361, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "name" }, - "links": [], - "reverseYBuckets": false, + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_initial_scan_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "tidb_log_backup_advancer_owner > 0", + "instant": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{ instance }}", "refId": "A" } ], - "title": "Initial scanning duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "timeFrom": null, + "timeShift": null, + "title": "Advancer Owner", + "type": "stat" }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateGreens", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of converting a raft request into a apply event. \n*This duration is for consuming a batch of events.*", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 83 + "h": 3, + "w": 3, + "x": 21, + "y": 60 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 13934, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "id": 14911, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "links": [], - "reverseYBuckets": false, + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"to_stream_event\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "round(sum(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m])))", + "hide": false, + "instant": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" + "legendFormat": "{{ instance }}", + "refId": "B" } ], - "title": "Convert Raft Event duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "timeFrom": null, + "timeShift": null, + "title": "Flush Files (Last 30m)", + "type": "stat" }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateGreens", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of waiting the mutex of the controller. \n*This duration is for consuming a batch of events.*", + "decimals": 1, + "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 0, + "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 18, - "y": 83 + "x": 0, + "y": 63 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 12840, + "hiddenSeries": false, + "id": 13262, "legend": { "alignAsTable": true, - "avg": false, + "avg": true, "current": true, + "hideEmpty": true, "max": true, "min": false, - "rightSide": true, + "rightSide": false, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"get_router_lock\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "sum(rate(tikv_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}[2m])) by (instance)", + "format": "time_series", + "hide": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" + "legendFormat": "{{name}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 } ], - "title": "Wait for Lock Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateCividis", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of KV-modify of each raft command observed.", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 0, - "y": 90 + "x": 6, + "y": 63 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 15059, + "hiddenSeries": false, + "id": 12843, "legend": { "alignAsTable": true, - "avg": false, + "avg": true, "current": true, - "max": true, + "max": false, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_handle_kv_batch_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", - "format": "heatmap", + "expr": "rate(tikv_log_backup_handle_kv_batch_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "format": "time_series", "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "refId": "A" } ], - "title": "Command Batch Size", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Handle Event Rate", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "short", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of saving an event into temporary file. \n*This duration is for consuming a batch of events.*", + "description": "The data rate of initial scanning emitting events.", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 6, - "y": 90 + "x": 12, + "y": 63 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 12841, + "hiddenSeries": false, + "id": 14135, "legend": { "alignAsTable": true, - "avg": false, - "current": true, + "avg": true, + "current": false, "max": true, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, - "links": [], - "reverseYBuckets": false, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_to_temp_file\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "rate(tikv_log_backup_incremental_scan_bytes_sum{instance=~\"$instance\"}[$__rate_interval])", "interval": "", "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", + "queryType": "randomWalk", "refId": "A" } ], - "title": "Save to Temp File Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Initial Scan Generate Event Throughput", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 600000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Checkpoint Lag Too Huge", + "noDataState": "no_data", + "notifications": [] }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task. \n*This duration is for consuming a batch of events, for one region or one table.*", "fieldConfig": { - "defaults": {}, + "defaults": { + "unit": "ms" + }, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 12, - "y": 90 + "x": 18, + "y": 63 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 13552, + "hiddenSeries": false, + "id": 14774, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, - "links": [], - "reverseYBuckets": false, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"write_to_tempfile\"}[$__interval])) by (le)", - "format": "heatmap", + "expr": "time() * 1000 - max(tidb_log_backup_last_checkpoint / 262144 > 0) by (task)", "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{ task }}", "refId": "A" + }, + { + "exemplar": true, + "expr": "time() * 1000", + "hide": true, + "interval": "", + "legendFormat": "Current Time", + "refId": "B" } ], - "title": "Write to Temp File Duration", + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 600000, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Abnormal Checkpoint TS Lag", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": "3000000", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of collecting metadata and call the UNIX system call *write* for each event. \n*This duration is for consuming a batch of events, for one region or one table.*", + "description": "The estimated memory usage by the streaming backup module.", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 18, - "y": 90 + "x": 0, + "y": 73 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 13551, + "hiddenSeries": false, + "id": 13100, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"syscall_write\"}[$__interval])) by (le)", - "format": "heatmap", + "expr": "tikv_log_backup_heap_memory{instance=~\"$instance\"}", + "format": "time_series", "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "refId": "A" } ], - "title": "System Write Call Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Of Events", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -43290,7 +42910,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal message type count.", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] @@ -43298,21 +42918,21 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 97 + "h": 10, + "w": 6, + "x": 6, + "y": 73 }, "hiddenSeries": false, - "id": 14914, + "id": 14630, "legend": { "avg": false, - "current": false, + "current": true, "max": false, "min": false, - "show": false, + "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -43325,25 +42945,39 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "total", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_interal_actor_acting_duration_sec_count{instance=~\"$instance\"}[$__rate_interval])) by (message)", + "expr": "tikv_log_backup_observed_region{instance=~\"$instance\"}", "interval": "", - "legendFormat": "{{ message }}", + "intervalFactor": 2, + "legendFormat": "{{instance}}", "queryType": "randomWalk", "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(tikv_log_backup_observed_region{instance=~\"$instance\"})", + "hide": false, + "interval": "", + "legendFormat": "total", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Message Type", + "title": "Observed Region Count", "tooltip": { "shared": true, "sort": 0, @@ -43359,17 +42993,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "ops", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:104", - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -43383,43 +43015,39 @@ } }, { - "aliasColors": { - "watch_task": "orange" - }, - "bars": false, + "aliasColors": {}, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": "The errors met when backing up.\n**They are retryable, don't worry.**", "fieldConfig": { - "defaults": { - "unit": "s" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, + "h": 5, "w": 6, "x": 12, - "y": 97 + "y": 73 }, "hiddenSeries": false, - "id": 14912, + "id": 13101, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, + "hideZero": true, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, - "lines": true, + "lines": false, "linewidth": 1, + "links": [], "nullPointMode": "null", "options": { "alertThreshold": true @@ -43431,23 +43059,35 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, - "steppedLine": false, + "stack": true, + "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", - "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", + "expr": "increase(tikv_log_backup_errors{instance=~\"$instance\"}[$__interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{type}}@{{instance}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "tikv_log_backup_errors{instance=~\"$instance\"}", + "hide": true, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Message Handling Duration (P99)", + "title": "Errors", "tooltip": { "shared": true, "sort": 0, @@ -43463,8 +43103,7 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -43472,13 +43111,12 @@ "show": true }, { - "$$hashKey": "object:104", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { @@ -43492,7 +43130,6 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", "fieldConfig": { "defaults": {}, "overrides": [] @@ -43500,13 +43137,13 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, + "h": 10, "w": 6, "x": 18, - "y": 97 + "y": 73 }, "hiddenSeries": false, - "id": 14913, + "id": 14910, "legend": { "avg": false, "current": false, @@ -43527,25 +43164,39 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "Current Time", + "dashes": true, + "fill": 0 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", + "expr": "max(tidb_log_backup_last_checkpoint{instance=~\"$instance\"} / 262144 > 0) by (task)", + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", + "legendFormat": "{{ task }}", "refId": "A" + }, + { + "exemplar": true, + "expr": "time() * 1000", + "hide": false, + "interval": "", + "legendFormat": "Current Time", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Message Handling Duration (P90)", + "title": "Checkpoint TS of Tasks", "tooltip": { "shared": true, "sort": 0, @@ -43561,16 +43212,14 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "s", + "format": "dateTimeAsIsoNoDateIfToday", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:104", "format": "short", "label": null, "logBase": 1, @@ -43586,11 +43235,11 @@ }, { "aliasColors": {}, - "bars": false, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", + "description": "The errors met when backing up.", "fieldConfig": { "defaults": {}, "overrides": [] @@ -43598,25 +43247,26 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, + "h": 5, "w": 6, - "x": 0, - "y": 103 + "x": 12, + "y": 78 }, "hiddenSeries": false, - "id": 14271, + "id": 14908, "legend": { "avg": false, "current": false, + "hideZero": true, "max": false, "min": false, - "rightSide": true, - "show": true, + "show": false, "total": false, "values": false }, - "lines": true, + "lines": false, "linewidth": 1, + "links": [], "nullPointMode": "null", "options": { "alertThreshold": true @@ -43628,23 +43278,44 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, - "steppedLine": false, + "stack": true, + "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op=~\"read_bytes\"}[$__rate_interval])) BY (op, cf)", - "interval": "", - "legendFormat": "{{ cf }}", - "queryType": "randomWalk", + "expr": "increase(tikv_log_backup_fatal_errors{instance=~\"$instance\"}[$__interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{type}}@{{instance}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "", + "hide": true, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "yaxis": "left" } ], - "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Initial Scan RocksDB Throughput ", + "title": "Fatal Errors", "tooltip": { "shared": true, "sort": 0, @@ -43660,329 +43331,524 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "binBps", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": "0", "show": true }, { - "$$hashKey": "object:104", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The duration of flushing a batch of file.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 83 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 14078, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_flush_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_files\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Flush Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateReds", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The duration of scanning the initial data from local DB and transform them into apply events. \n", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 83 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 14136, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_initial_scan_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Initial scanning duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateGreens", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The duration of converting a raft request into a apply event. \n*This duration is for consuming a batch of events.*", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 83 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 13934, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"to_stream_event\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Convert Raft Event duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateGreens", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Misc statistics of RocksDB during initial scanning.", + "description": "The duration of waiting the mutex of the controller. \n*This duration is for consuming a batch of events.*", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 6, + "h": 7, "w": 6, - "x": 6, - "y": 103 + "x": 18, + "y": 83 }, - "hiddenSeries": false, - "id": 14270, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 12840, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true + "values": true }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "links": [], + "reverseYBuckets": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"get_router_lock\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, "interval": "", - "legendFormat": "{{ cf }}/{{ op }}", - "queryType": "randomWalk", + "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Initial Scan RocksDB Operation ", + "title": "Wait for Lock Duration", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "$$hashKey": "object:103", - "format": "ops", - "label": null, - "logBase": 2, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:104", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": { - "leader-changed": "blue", - "region-changed": "purple" + "cards": { + "cardPadding": 0, + "cardRound": 0 }, - "bars": true, - "dashLength": 10, - "dashes": false, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateCividis", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The reason of triggering initial scanning.", + "description": "The number of KV-modify of each raft command observed.", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 6, + "h": 7, "w": 6, - "x": 12, - "y": 103 + "x": 0, + "y": 90 }, - "hiddenSeries": false, - "id": 14915, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 15059, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": false + "values": true }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + "links": [], + "reverseYBuckets": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_initial_scan_reason{instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "expr": "sum(increase(tikv_log_backup_handle_kv_batch_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", + "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Initial Scanning Trigger Reason", + "title": "Command Batch Size", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "short", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "$$hashKey": "object:2608", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:2609", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": { - "del": "dark-red", - "put": "green" + "cards": { + "cardPadding": 0, + "cardRound": 0 }, - "bars": false, - "dashLength": 10, - "dashes": false, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The total cost of saving an event into temporary file. \n*This duration is for consuming a batch of events.*", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 6, + "h": 7, "w": 6, - "x": 18, - "y": 103 + "x": 6, + "y": 90 }, - "hiddenSeries": false, - "id": 15176, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 12841, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true + "values": true }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "links": [], + "reverseYBuckets": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_metadata_key_operation{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_to_temp_file\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, "interval": "", - "legendFormat": "{{ type }}", - "queryType": "randomWalk", + "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Region Checkpoint Key Putting", + "title": "Save to Temp File Duration", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "$$hashKey": "object:2608", - "format": "cps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:2609", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cards": { @@ -44000,7 +43866,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task. \n*This duration is for consuming a batch of events, for one region or one table.*", "fieldConfig": { "defaults": {}, "overrides": [] @@ -44008,13 +43874,13 @@ "gridPos": { "h": 7, "w": 6, - "x": 0, - "y": 109 + "x": 12, + "y": 90 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 15544, + "id": 13552, "legend": { "alignAsTable": true, "avg": false, @@ -44033,7 +43899,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tidb_log_backup_advancer_batch_size_bucket{type=\"checkpoint\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"write_to_tempfile\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -44042,7 +43908,7 @@ "refId": "A" } ], - "title": "Request Checkpoint Batch Size", + "title": "Write to Temp File Duration", "tooltip": { "show": true, "showHistogram": true @@ -44056,7 +43922,7 @@ "xBucketSize": null, "yAxis": { "decimals": 1, - "format": "none", + "format": "s", "logBase": 1, "max": null, "min": null, @@ -44083,7 +43949,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The duration of collecting metadata and call the UNIX system call *write* for each event. \n*This duration is for consuming a batch of events, for one region or one table.*", "fieldConfig": { "defaults": {}, "overrides": [] @@ -44091,13 +43957,13 @@ "gridPos": { "h": 7, "w": 6, - "x": 6, - "y": 109 + "x": 18, + "y": 90 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 15716, + "id": 13551, "legend": { "alignAsTable": true, "avg": false, @@ -44109,74 +43975,168 @@ "sort": "current", "sortDesc": true, "total": false, - "values": true + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"syscall_write\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "System Write Call Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The internal message type count.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 97 + }, + "hiddenSeries": false, + "id": 14914, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false }, - "links": [], - "reverseYBuckets": false, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tidb_log_backup_advancer_tick_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", step=~\"tick\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "sum(rate(tikv_log_backup_interal_actor_acting_duration_sec_count{instance=~\"$instance\"}[$__rate_interval])) by (message)", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{ message }}", + "queryType": "randomWalk", "refId": "A" } ], - "title": "Tick Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Internal Message Type", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 2, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { - "epoch-not-match": "purple", - "not-leader": "blue", "watch_task": "orange" }, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The reason of advancer failed to be advanced.", + "description": "The internal handling message duration.", "fieldConfig": { "defaults": { - "unit": "none" + "unit": "s" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 12, - "y": 109 + "y": 97 }, "hiddenSeries": false, - "id": 23763572666, + "id": 14912, "legend": { "alignAsTable": false, "avg": false, @@ -44188,7 +44148,7 @@ "total": false, "values": false }, - "lines": false, + "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { @@ -44201,33 +44161,23 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tidb_log_backup_region_request_failure{reason!=\"retryable-scan-region\"}[$__interval])) by (reason)", - "hide": false, + "expr": "sum(histogram_quantile(0.99, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ reason }}", + "legendFormat": "{{ message }}", "queryType": "randomWalk", "refId": "A" - }, - { - "exemplar": true, - "expr": "", - "hide": false, - "interval": "", - "legendFormat": "", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Region Checkpoint Failure Reason", + "title": "Internal Message Handling Duration (P99)", "tooltip": { "shared": true, "sort": 0, @@ -44243,8 +44193,7 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -44252,13 +44201,12 @@ "show": true }, { - "$$hashKey": "object:104", - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ], "yaxis": { @@ -44267,44 +44215,36 @@ } }, { - "aliasColors": { - "fail": "red", - "success": "green", - "watch_task": "orange" - }, - "bars": true, + "aliasColors": {}, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The result of getting region checkpoints.", + "description": "The internal handling message duration.", "fieldConfig": { - "defaults": { - "unit": "none" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 18, - "y": 109 + "y": 97 }, "hiddenSeries": false, - "id": 23763572665, + "id": 14913, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, "show": true, "total": false, "values": false }, - "lines": false, + "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { @@ -44315,42 +44255,25 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:834", - "alias": "fail", - "transform": "negative-Y", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tidb_log_backup_region_request[$__interval])) by (result)", - "hide": false, + "expr": "sum(histogram_quantile(0.9, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ result }}", + "legendFormat": "{{ message }}", "queryType": "randomWalk", "refId": "A" - }, - { - "exemplar": true, - "expr": "", - "hide": false, - "interval": "", - "legendFormat": "", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Request Result", + "title": "Internal Message Handling Duration (P90)", "tooltip": { "shared": true, "sort": 0, @@ -44366,8 +44289,7 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -44375,8 +44297,7 @@ "show": true }, { - "$$hashKey": "object:104", - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -44390,37 +44311,32 @@ } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": "The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", "fieldConfig": { - "defaults": { - "unit": "s" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 0, - "y": 116 + "y": 103 }, "hiddenSeries": false, - "id": 15359, + "id": 14271, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -44436,32 +44352,16 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1017", - "alias": "consistency-check", - "yaxis": 1 - }, - { - "$$hashKey": "object:1018", - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "$$hashKey": "object:1019", - "alias": "get-checkpoints-in-range", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op=~\"read_bytes\"}[$__rate_interval])) BY (op, cf)", "interval": "", - "legendFormat": "{{ step }}", + "legendFormat": "{{ cf }}", "queryType": "randomWalk", "refId": "A" } @@ -44470,7 +44370,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Tick Duration (P99)", + "title": "Initial Scan RocksDB Throughput ", "tooltip": { "shared": true, "sort": 0, @@ -44486,17 +44386,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "s", + "format": "binBps", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": "0", "show": true }, { - "$$hashKey": "object:104", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -44510,37 +44408,32 @@ } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": "Misc statistics of RocksDB during initial scanning.", "fieldConfig": { - "defaults": { - "unit": "s" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 6, - "y": 116 + "y": 103 }, "hiddenSeries": false, - "id": 15360, + "id": 14270, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -44556,32 +44449,16 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1091", - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "$$hashKey": "object:1092", - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "$$hashKey": "object:1093", - "alias": "consistency-check", - "yaxis": 1 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", "interval": "", - "legendFormat": "{{ step }}", + "legendFormat": "{{ cf }}/{{ op }}", "queryType": "randomWalk", "refId": "A" } @@ -44590,7 +44467,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Tick Duration (P90)", + "title": "Initial Scan RocksDB Operation ", "tooltip": { "shared": true, "sort": 0, @@ -44606,17 +44483,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "s", + "format": "ops", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": "0", "show": true }, { - "$$hashKey": "object:104", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -44631,77 +44506,58 @@ }, { "aliasColors": { - "watch_task": "orange" + "leader-changed": "blue", + "region-changed": "purple" }, - "bars": false, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The frequent of getting region level checkpoint.", + "description": "The reason of triggering initial scanning.", "fieldConfig": { - "defaults": { - "unit": "none" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 12, - "y": 116 + "y": 103 }, "hiddenSeries": false, - "id": 23763572733, + "id": 14915, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, "show": true, "total": false, "values": false }, - "lines": true, + "lines": false, "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true + "alertThreshold": false }, "percentage": false, "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1091", - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "$$hashKey": "object:1092", - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "$$hashKey": "object:1093", - "alias": "consistency-check", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "rate(tidb_log_backup_advancer_tick_duration_sec_count{step=\"get-regions-in-range\"}[$__rate_interval])", + "expr": "sum(increase(tikv_log_backup_initial_scan_reason{instance=~\"$instance\"}[$__rate_interval])) by (reason)", "interval": "", - "legendFormat": "{{ step }} {{ instance }}", + "legendFormat": "{{ message }}", "queryType": "randomWalk", "refId": "A" } @@ -44710,7 +44566,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Get Region Operation Count", + "title": "Initial Scanning Trigger Reason", "tooltip": { "shared": true, "sort": 0, @@ -44726,17 +44582,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:104", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -44751,41 +44605,38 @@ }, { "aliasColors": { - "watch_task": "orange" + "del": "dark-red", + "put": "green" }, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The variant of checkpoint group.", + "description": "", "fieldConfig": { - "defaults": { - "unit": "none" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 18, - "y": 116 + "y": 103 }, "hiddenSeries": false, - "id": 23763572734, + "id": 15176, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, "show": true, "total": false, "values": false }, - "lines": false, + "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { @@ -44796,33 +44647,16 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1091", - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "$$hashKey": "object:1092", - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "$$hashKey": "object:1093", - "alias": "consistency-check", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "increase(tidb_log_backup_advancer_tick_duration_sec_count{step=\"try-advance\"}[$__interval])", + "expr": "sum(rate(tikv_log_backup_metadata_key_operation{instance=~\"$instance\"}[$__rate_interval])) by (type)", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ step }} {{ instance }}", + "legendFormat": "{{ type }}", "queryType": "randomWalk", "refId": "A" } @@ -44831,7 +44665,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Try Advance Trigger Time", + "title": "Region Checkpoint Key Putting", "tooltip": { "shared": true, "sort": 0, @@ -44847,17 +44681,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "none", + "format": "cps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:104", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -44869,74 +44701,251 @@ "align": false, "alignLevel": null } - } - ], - "title": "Backup Log", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 48 - }, - "id": 4466, - "panels": [ + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 109 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 15544, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_advancer_batch_size_bucket{type=\"checkpoint\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Request Checkpoint Batch Size", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 109 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 15716, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_advancer_tick_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", step=~\"tick\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Tick Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { - "aliasColors": {}, - "bars": false, + "aliasColors": { + "epoch-not-match": "purple", + "not-leader": "blue", + "watch_task": "orange" + }, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total number of encryption data keys in use", + "description": "The reason of advancer failed to be advanced.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 58 + "h": 7, + "w": 6, + "x": 12, + "y": 109 }, - "id": 4464, + "hiddenSeries": false, + "id": 23763572666, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, + "current": false, + "hideEmpty": false, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, - "lines": true, + "lines": false, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_data_key_storage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_region_request_failure{reason!=\"retryable-scan-region\"}[$__interval])) by (reason)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ reason }}", + "queryType": "randomWalk", "refId": "A" + }, + { + "exemplar": true, + "expr": "", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encryption data keys", + "title": "Region Checkpoint Failure Reason", "tooltip": { "shared": true, "sort": 0, @@ -44952,17 +44961,15 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "decimals": 0, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44976,57 +44983,89 @@ } }, { - "aliasColors": {}, - "bars": false, + "aliasColors": { + "fail": "red", + "success": "green", + "watch_task": "orange" + }, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Number of files being encrypted", + "description": "The result of getting region checkpoints.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 58 + "h": 7, + "w": 6, + "x": 18, + "y": 109 }, - "id": 4554, + "hiddenSeries": false, + "id": 23763572665, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, "show": true, "total": false, - "values": true + "values": false }, - "lines": true, + "lines": false, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "fail", + "transform": "negative-Y", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_file_num{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_region_request[$__interval])) by (result)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ result }}", + "queryType": "randomWalk", "refId": "A" + }, + { + "exemplar": true, + "expr": "", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encrypted files", + "title": "Request Result", "tooltip": { "shared": true, "sort": 0, @@ -45042,15 +45081,15 @@ }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -45064,49 +45103,76 @@ } }, { - "aliasColors": {}, + "aliasColors": { + "watch_task": "orange" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Flag to indicate if encryption is initialized", + "description": "The internal handling message duration.", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 6, "x": 0, - "y": 66 + "y": 116 }, - "id": 4555, + "hiddenSeries": false, + "id": 15359, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, + "current": false, + "hideEmpty": false, "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "consistency-check", + "yaxis": 1 + }, + { + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "alias": "get-checkpoints-in-range", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_is_initialized{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", + "exemplar": true, + "expr": "sum(histogram_quantile(0.99, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "interval": "", + "legendFormat": "{{ step }}", + "queryType": "randomWalk", "refId": "A" } ], @@ -45114,7 +45180,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encryption initialized", + "title": "Tick Duration (P99)", "tooltip": { "shared": true, "sort": 0, @@ -45130,17 +45196,15 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "decimals": 0, - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -45154,49 +45218,76 @@ } }, { - "aliasColors": {}, + "aliasColors": { + "watch_task": "orange" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total size of encryption meta files", + "description": "The internal handling message duration.", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 66 + "h": 7, + "w": 6, + "x": 6, + "y": 116 }, - "id": 4556, + "hiddenSeries": false, + "id": 15360, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "alias": "consistency-check", + "yaxis": 1 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_meta_file_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{name}}-{{instance}}", + "exemplar": true, + "expr": "sum(histogram_quantile(0.9, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "interval": "", + "legendFormat": "{{ step }}", + "queryType": "randomWalk", "refId": "A" } ], @@ -45204,7 +45295,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encryption meta files size", + "title": "Tick Duration (P90)", "tooltip": { "shared": true, "sort": 0, @@ -45220,15 +45311,15 @@ }, "yaxes": [ { - "format": "decbytes", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -45242,64 +45333,84 @@ } }, { - "aliasColors": {}, + "aliasColors": { + "watch_task": "orange" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The frequent of getting region level checkpoint.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 74 + "h": 7, + "w": 6, + "x": 12, + "y": 116 }, - "id": 4557, + "hiddenSeries": false, + "id": 23763572733, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, + "current": false, + "hideEmpty": false, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "alias": "consistency-check", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"encrypt_data_nanos\"}[1m])) by (req)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "encrypt-{{req}}", + "exemplar": true, + "expr": "rate(tidb_log_backup_advancer_tick_duration_sec_count{step=\"get-regions-in-range\"}[$__rate_interval])", + "interval": "", + "legendFormat": "{{ step }} {{ instance }}", + "queryType": "randomWalk", "refId": "A" - }, - { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"decrypt_data_nanos\"}[1m])) by (req)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "decrypt-{{req}}", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encrypt/decrypt data nanos", + "title": "Get Region Operation Count", "tooltip": { "shared": true, "sort": 0, @@ -45315,15 +45426,15 @@ }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -45337,75 +45448,85 @@ } }, { - "aliasColors": {}, - "bars": false, + "aliasColors": { + "watch_task": "orange" + }, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Writing or reading file duration (second)", + "description": "The variant of checkpoint group.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 74 + "h": 7, + "w": 6, + "x": 18, + "y": 116 }, - "id": 4559, + "hiddenSeries": false, + "id": 23763572734, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, + "current": false, + "hideEmpty": false, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, - "lines": true, + "lines": false, "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "alias": "consistency-check", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", - "format": "time_series", - "instant": false, + "exemplar": true, + "expr": "increase(tidb_log_backup_advancer_tick_duration_sec_count{step=\"try-advance\"}[$__interval])", "interval": "", "intervalFactor": 2, - "legendFormat": "max-{{type}}-{{operation}}", + "legendFormat": "{{ step }} {{ instance }}", + "queryType": "randomWalk", "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "95%-{{type}}-{{operation}}", - "refId": "B" - }, - { - "expr": "sum(rate(tikv_encryption_write_read_file_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation) / sum(rate(tikv_encryption_write_read_file_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "avg-{{type}}-{{operation}}", - "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Read/write encryption meta duration", + "title": "Try Advance Trigger Time", "tooltip": { "shared": true, "sort": 0, @@ -45421,16 +45542,15 @@ }, "yaxes": [ { - "decimals": null, - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -45444,7 +45564,7 @@ } } ], - "title": "Encryption", + "title": "Backup Log", "type": "row" } ],