diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 050a67276220..6192b4f3a5e6 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -19400,6 +19400,145 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The time consumed on reading when executing commit command", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 63 + }, + "hiddenSeries": false, + "id": 23763572710, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_processing_read_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99%", + "metric": "", + "refId": "A", + "step": 10 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_processing_read_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "95%", + "metric": "", + "refId": "B", + "step": 10 + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_scheduler_processing_read_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_processing_read_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg", + "metric": "", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scheduler command read duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:95", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:96", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "cards": { "cardPadding": null, @@ -19422,8 +19561,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 60 + "x": 12, + "y": 63 }, "heatmap": {}, "hideZeroBuckets": true, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 802c35af0205..9e778afe0648 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -547,7 +547,7 @@ impl Storage { key: Key, start_ts: TimeStamp, ) -> impl Future, KvGetStatistics)>> { - let stage_begin_ts = Instant::now_coarse(); + let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::get; let priority = ctx.get_priority(); let priority_tag = get_priority_tag(priority); @@ -563,7 +563,7 @@ impl Storage { let res = self.read_pool.spawn_handle( async move { - let stage_scheduled_ts = Instant::now_coarse(); + let stage_scheduled_ts = Instant::now(); tls_collect_query( ctx.get_region_id(), ctx.get_peer(), @@ -580,7 +580,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [key.as_encoded()])?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); // The bypass_locks and access_locks set will be checked at most once. // `TsSet::vec` is more efficient here. @@ -598,7 +598,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let stage_snap_recv_ts = begin_instant; let buckets = snapshot.ext().get_buckets(); let mut statistics = Statistics::default(); @@ -656,7 +656,7 @@ impl Storage { .inc_by(quota_delay.as_micros() as u64); } - let stage_finished_ts = Instant::now_coarse(); + let stage_finished_ts = Instant::now(); let schedule_wait_time = stage_scheduled_ts.saturating_duration_since(stage_begin_ts); let snapshot_wait_time = @@ -724,7 +724,7 @@ impl Storage { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC .get(CMD) .observe(requests.len() as f64); - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let read_id = Some(ThreadReadId::new()); let mut statistics = Statistics::default(); let mut req_snaps = vec![]; @@ -871,7 +871,7 @@ impl Storage { keys: Vec, start_ts: TimeStamp, ) -> impl Future>, KvGetStatistics)>> { - let stage_begin_ts = Instant::now_coarse(); + let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::batch_get; let priority = ctx.get_priority(); let priority_tag = get_priority_tag(priority); @@ -888,7 +888,7 @@ impl Storage { let mut sample = quota_limiter.new_sample(); let res = self.read_pool.spawn_handle( async move { - let stage_scheduled_ts = Instant::now_coarse(); + let stage_scheduled_ts = Instant::now(); let mut key_ranges = vec![]; for key in &keys { key_ranges.push(build_key_range(key.as_encoded(), key.as_encoded(), false)); @@ -912,7 +912,7 @@ impl Storage { keys.iter().map(Key::as_encoded), )?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let bypass_locks = TsSet::from_u64s(ctx.take_resolved_locks()); let access_locks = TsSet::from_u64s(ctx.take_committed_locks()); @@ -928,7 +928,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let stage_snap_recv_ts = begin_instant; let mut statistics = Vec::with_capacity(keys.len()); @@ -999,7 +999,7 @@ impl Storage { .inc_by(quota_delay.as_micros() as u64); } - let stage_finished_ts = Instant::now_coarse(); + let stage_finished_ts = Instant::now(); let schedule_wait_time = stage_scheduled_ts.saturating_duration_since(stage_begin_ts); let snapshot_wait_time = @@ -1102,7 +1102,7 @@ impl Storage { if reverse_scan { std::mem::swap(&mut start_key, &mut end_key); } - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let bypass_locks = TsSet::from_u64s(ctx.take_resolved_locks()); let access_locks = TsSet::from_u64s(ctx.take_committed_locks()); @@ -1155,7 +1155,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let perf_statistics = ReadPerfInstant::new(); let buckets = snapshot.ext().get_buckets(); @@ -1266,7 +1266,7 @@ impl Storage { // Do not check_api_version in scan_lock, to be compatible with TiDB gc-worker, // which resolves locks on regions, and boundary of regions will be out of range of TiDB keys. - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); concurrency_manager.update_max_ts(max_ts); let begin_instant = Instant::now(); @@ -1305,7 +1305,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut statistics = Statistics::default(); let perf_statistics = ReadPerfInstant::new(); let buckets = snapshot.ext().get_buckets(); @@ -1481,7 +1481,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [&key])?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -1492,7 +1492,7 @@ impl Storage { let store = RawStore::new(snapshot, api_version); let cf = Self::rawkv_cf(&cf, api_version)?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut stats = Statistics::default(); let key = F::encode_raw_key_owned(key, None); // Keys pass to `tls_collect_query` should be encoded, to get correct keys for region split. @@ -1577,7 +1577,7 @@ impl Storage { .map_err(Error::from)?; } - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let read_id = Some(ThreadReadId::new()); let mut snaps = vec![]; for (mut req, id) in gets.into_iter().zip(ids) { @@ -1604,7 +1604,7 @@ impl Storage { snaps.push((id, key, ctx, req, snap)); } Self::with_tls_engine(|engine| engine.release_snapshot()); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); for (id, key, ctx, mut req, snap) in snaps { let cf = req.take_cf(); match snap.await { @@ -1684,7 +1684,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, &keys)?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -1694,7 +1694,7 @@ impl Storage { let buckets = snapshot.ext().get_buckets(); let store = RawStore::new(snapshot, api_version); { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let cf = Self::rawkv_cf(&cf, api_version)?; // no scan_count for this kind of op. @@ -2020,7 +2020,7 @@ impl Storage { [(Some(&start_key), end_key.as_ref())], )?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2031,7 +2031,7 @@ impl Storage { let cf = Self::rawkv_cf(&cf, api_version)?; { let store = RawStore::new(snapshot, api_version); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let start_key = F::encode_raw_key_owned(start_key, None); let end_key = end_key.map(|k| F::encode_raw_key_owned(k, None)); @@ -2155,7 +2155,7 @@ impl Storage { .map(|range| (Some(range.get_start_key()), Some(range.get_end_key()))), )?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2297,7 +2297,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [&key])?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2308,7 +2308,7 @@ impl Storage { let store = RawStore::new(snapshot, api_version); let cf = Self::rawkv_cf(&cf, api_version)?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut stats = Statistics::default(); let key = F::encode_raw_key_owned(key, None); // Keys pass to `tls_collect_query` should be encoded, to get correct keys for region split. @@ -2462,7 +2462,7 @@ impl Storage { .map(|range| (Some(range.get_start_key()), Some(range.get_end_key()))), )?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2473,7 +2473,7 @@ impl Storage { let store = RawStore::new(snapshot, api_version); let cf = Self::rawkv_cf("", api_version)?; - let begin_instant = tikv_util::time::Instant::now_coarse(); + let begin_instant = tikv_util::time::Instant::now(); let mut stats = Vec::with_capacity(ranges.len()); let ret = store .raw_checksum_ranges(cf, &ranges, &mut stats) diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index d83d8fe6f46b..12ff44bbd616 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -13,7 +13,6 @@ use prometheus::local::*; use raftstore::store::WriteStats; use tikv_util::{ sys::SysQuota, - time::Duration, yatp_pool::{FuturePool, PoolTicker, YatpPoolBuilder}, }; @@ -24,8 +23,6 @@ use crate::storage::{ pub struct SchedLocalMetrics { local_scan_details: HashMap<&'static str, Statistics>, - processing_read_duration: LocalHistogramVec, - processing_write_duration: LocalHistogramVec, command_keyread_histogram_vec: LocalHistogramVec, local_write_stats: WriteStats, } @@ -34,8 +31,6 @@ thread_local! { static TLS_SCHED_METRICS: RefCell = RefCell::new( SchedLocalMetrics { local_scan_details: HashMap::default(), - processing_read_duration: SCHED_PROCESSING_READ_HISTOGRAM_VEC.local(), - processing_write_duration: SCHED_PROCESSING_WRITE_HISTOGRAM_VEC.local(), command_keyread_histogram_vec: KV_COMMAND_KEYREAD_HISTOGRAM_VEC.local(), local_write_stats:WriteStats::default(), } @@ -112,8 +107,6 @@ pub fn tls_flush(reporter: &R) { } } } - m.processing_read_duration.flush(); - m.processing_write_duration.flush(); m.command_keyread_histogram_vec.flush(); // Report PD metrics @@ -132,15 +125,6 @@ pub fn tls_collect_query(region_id: u64, kind: QueryKind) { }); } -pub fn tls_collect_read_duration(cmd: &str, duration: Duration) { - TLS_SCHED_METRICS.with(|m| { - m.borrow_mut() - .processing_read_duration - .with_label_values(&[cmd]) - .observe(tikv_util::time::duration_to_sec(duration)) - }); -} - pub fn tls_collect_keyread_histogram_vec(cmd: &str, count: f64) { TLS_SCHED_METRICS.with(|m| { m.borrow_mut() diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 3460a1de5fd8..283787e9ba19 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -64,9 +64,7 @@ use crate::{ commands::{Command, ResponsePolicy, WriteContext, WriteResult, WriteResultLockInfo}, flow_controller::FlowController, latch::{Latches, Lock}, - sched_pool::{ - tls_collect_query, tls_collect_read_duration, tls_collect_scan_details, SchedPool, - }, + sched_pool::{tls_collect_query, tls_collect_scan_details, SchedPool}, Error, ProcessResult, }, types::StorageCallback, @@ -733,8 +731,6 @@ impl Scheduler { tag, ts ); - - tls_collect_read_duration(tag.get_str(), elapsed); } .in_resource_metering_tag(resource_tag) .await; @@ -748,10 +744,14 @@ impl Scheduler { let tag = task.cmd.tag(); + let begin_instant = Instant::now(); let pr = task .cmd .process_read(snapshot, statistics) .unwrap_or_else(|e| ProcessResult::Failed { err: e.into() }); + SCHED_PROCESSING_READ_HISTOGRAM_STATIC + .get(tag) + .observe(begin_instant.saturating_elapsed_secs()); self.on_read_finished(task.cid, pr, tag); } @@ -782,10 +782,15 @@ impl Scheduler { statistics, async_apply_prewrite: self.inner.enable_async_apply_prewrite, }; - - task.cmd + let begin_instant = Instant::now(); + let res = task + .cmd .process_write(snapshot, context) - .map_err(StorageError::from) + .map_err(StorageError::from); + SCHED_PROCESSING_READ_HISTOGRAM_STATIC + .get(tag) + .observe(begin_instant.saturating_elapsed_secs()); + res }; if write_result.is_ok() {