From 5a542584e0a0f43940251008c162d705fc592fda Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Wed, 18 May 2022 16:36:24 +0800 Subject: [PATCH 1/9] Report the file usage of BlobStore Signed-off-by: JaySon-Huang --- dbms/src/Interpreters/AsynchronousMetrics.cpp | 33 +++++++++++++ dbms/src/Server/Server.cpp | 6 +++ dbms/src/Storages/DeltaMerge/StoragePool.h | 2 + dbms/src/Storages/Page/FileUsage.h | 30 ++++++++++++ dbms/src/Storages/Page/PageStorage.cpp | 18 ++++++++ dbms/src/Storages/Page/PageStorage.h | 9 ++++ dbms/src/Storages/Page/Snapshot.h | 2 +- dbms/src/Storages/Page/V3/BlobStore.cpp | 46 ++++++++++++++++--- dbms/src/Storages/Page/V3/BlobStore.h | 14 ++++-- dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 5 ++ dbms/src/Storages/Page/V3/PageStorageImpl.h | 2 + dbms/src/Storages/Transaction/KVStore.h | 5 ++ .../Storages/Transaction/RegionPersister.cpp | 6 +++ .../Storages/Transaction/RegionPersister.h | 3 ++ 14 files changed, 171 insertions(+), 10 deletions(-) create mode 100644 dbms/src/Storages/Page/FileUsage.h diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index e96d57e0370..c94f87bacf4 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -21,8 +21,12 @@ #include #include #include +#include #include +#include #include +#include +#include #include #include @@ -147,6 +151,7 @@ void AsynchronousMetrics::update() set("Uptime", context.getUptimeSeconds()); { + // Get the snapshot status from all delta tree tables auto databases = context.getDatabases(); double max_dt_stable_oldest_snapshot_lifetime = 0.0; @@ -177,6 +182,34 @@ void AsynchronousMetrics::update() set("MaxDTBackgroundTasksLength", max_dt_background_tasks_length); } + do + { + FileUsageStatistics usage; + + // Get from RegionPersister + auto & tmt = context.getTMTContext(); + auto & kvstore = tmt.getKVStore(); + const auto kvstore_usage = kvstore->getFileUsageStatistics(); + + // Get the blob file status from all PS V3 instances + auto global_storage_pool = context.getGlobalStoragePool(); + if (global_storage_pool == nullptr) + break; + + const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics(); + const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics(); + const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics(); + + usage.total_file_num = kvstore_usage.total_file_num + log_usage.total_file_num + meta_usage.total_file_num + data_usage.total_file_num; + usage.total_disk_size = kvstore_usage.total_disk_size + log_usage.total_disk_size + meta_usage.total_disk_size + data_usage.total_disk_size; + usage.total_valid_size = kvstore_usage.total_valid_size + log_usage.total_valid_size + meta_usage.total_valid_size + data_usage.total_valid_size; + + set("BlobFileNums", usage.total_file_num); + set("BlobDiskBytes", usage.total_disk_size); + set("BlobValidBytes", usage.total_valid_size); + + } while (false); + #if USE_TCMALLOC { /// tcmalloc related metrics. Remove if you switch to different allocator. diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 705b8a533f3..4f1cb3bbfdf 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -477,6 +477,11 @@ void initStores(Context & global_context, Poco::Logger * log, bool lazily_init_s int err_cnt = 0; for (auto & [table_id, storage] : storages) { + // This will skip the init of storages that do not contain any data. TiFlash now sync the schema and + // create all tables regardless the table have define TiFlash replica or not, so there may be lots + // of empty tables in TiFlash. + // Note that we still need to init stores that contains data (defined by the stable dir of this storage + // is exist), or the data used size reported to PD is not correct. try { init_cnt += storage->initStoreIfDataDirExist() ? 1 : 0; @@ -498,6 +503,7 @@ void initStores(Context & global_context, Poco::Logger * log, bool lazily_init_s if (lazily_init_store) { LOG_FMT_INFO(log, "Lazily init store."); + // apply the inited in another thread to shorten the start time of TiFlash std::thread(do_init_stores).detach(); } else diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.h b/dbms/src/Storages/DeltaMerge/StoragePool.h index d05454a5431..77684ea46cb 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.h +++ b/dbms/src/Storages/DeltaMerge/StoragePool.h @@ -28,6 +28,7 @@ struct Settings; class Context; class StoragePathPool; class StableDiskDelegator; +class AsynchronousMetrics; namespace DM { @@ -50,6 +51,7 @@ class GlobalStoragePool : private boost::noncopyable void restore(); friend class StoragePool; + friend class ::DB::AsynchronousMetrics; // GC immediately // Only used on dbgFuncMisc diff --git a/dbms/src/Storages/Page/FileUsage.h b/dbms/src/Storages/Page/FileUsage.h new file mode 100644 index 00000000000..813da99f65d --- /dev/null +++ b/dbms/src/Storages/Page/FileUsage.h @@ -0,0 +1,30 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include + +namespace DB +{ + +struct FileUsageStatistics +{ + size_t total_disk_size = 0; + size_t total_valid_size = 0; + size_t total_file_num = 0; +}; + +} // namespace DB diff --git a/dbms/src/Storages/Page/PageStorage.cpp b/dbms/src/Storages/Page/PageStorage.cpp index 6e1addae093..d8b767a6c15 100644 --- a/dbms/src/Storages/Page/PageStorage.cpp +++ b/dbms/src/Storages/Page/PageStorage.cpp @@ -66,6 +66,8 @@ class PageReaderImpl : private boost::noncopyable // Get some statistics of all living snapshots and the oldest living snapshot. virtual SnapshotsStatistics getSnapshotsStat() const = 0; + virtual FileUsageStatistics getFileUsageStatistics() const = 0; + virtual void traverse(const std::function & acceptor, bool only_v2, bool only_v3) const = 0; }; @@ -137,6 +139,11 @@ class PageReaderImplNormal : public PageReaderImpl storage->traverse(acceptor, nullptr); } + FileUsageStatistics getFileUsageStatistics() const override + { + return storage->getFileUsageStatistics(); + } + private: NamespaceId ns_id; PageStoragePtr storage; @@ -294,6 +301,11 @@ class PageReaderImplMixed : public PageReaderImpl return statistics_total; } + FileUsageStatistics getFileUsageStatistics() const override + { + return storage_v3->getFileUsageStatistics(); + } + void traverse(const std::function & acceptor, bool only_v2, bool only_v3) const override { // Used by RegionPersister::restore @@ -424,6 +436,12 @@ SnapshotsStatistics PageReader::getSnapshotsStat() const return impl->getSnapshotsStat(); } + +FileUsageStatistics PageReader::getFileUsageStatistics() const +{ + return impl->getFileUsageStatistics(); +} + void PageReader::traverse(const std::function & acceptor, bool only_v2, bool only_v3) const { impl->traverse(acceptor, only_v2, only_v3); diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index cec6e297d0e..0059c0570c1 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -251,6 +252,12 @@ class PageStorage : private boost::noncopyable // Get some statistics of all living snapshots and the oldest living snapshot. virtual SnapshotsStatistics getSnapshotsStat() const = 0; + virtual FileUsageStatistics getFileUsageStatistics() const + { + // return all zeros by default + return FileUsageStatistics{}; + } + virtual size_t getNumberOfPages() = 0; virtual std::set getAliveExternalPageIds(NamespaceId ns_id) = 0; @@ -380,6 +387,8 @@ class PageReader : private boost::noncopyable // Get some statistics of all living snapshots and the oldest living snapshot. SnapshotsStatistics getSnapshotsStat() const; + FileUsageStatistics getFileUsageStatistics() const; + void traverse(const std::function & acceptor, bool only_v2 = false, bool only_v3 = false) const; private: diff --git a/dbms/src/Storages/Page/Snapshot.h b/dbms/src/Storages/Page/Snapshot.h index 77e68f1b054..073fc0a2830 100644 --- a/dbms/src/Storages/Page/Snapshot.h +++ b/dbms/src/Storages/Page/Snapshot.h @@ -61,7 +61,7 @@ class PageStorageSnapshotMixed : public PageStorageSnapshot }; using PageStorageSnapshotMixedPtr = std::shared_ptr; -static inline PageStorageSnapshotMixedPtr +inline PageStorageSnapshotMixedPtr toConcreteMixedSnapshot(const PageStorageSnapshotPtr & ptr) { return std::static_pointer_cast(ptr); diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index dc5ed536f9e..fd000fb3e7f 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,7 @@ using ChecksumClass = Digest::CRC64; * BlobStore methods * *********************/ -BlobStore::BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, BlobStore::Config config_) +BlobStore::BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, const BlobStore::Config & config_) : delegator(std::move(delegator_)) , file_provider(file_provider_) , config(config_) @@ -115,6 +116,39 @@ void BlobStore::registerPaths() } } +FileUsageStatistics BlobStore::getFileUsageStatistics() const +{ + FileUsageStatistics usage; + + // Get a copy of stats map to avoid the big lock on stats map + const auto stats_list = blob_stats.getStats(); + + for (const auto & [path, stats] : stats_list) + { + (void)path; + for (const auto & stat : stats) + { + // We can access to these type without any locking. + if (stat->isReadOnly() || stat->isBigBlob()) + { + usage.total_disk_size += stat->sm_total_size; + usage.total_valid_size += stat->sm_valid_size; + } + else + { + // Else the stat may being updated, acquire a lock to avoid data race. + auto lock = stat->lock(); + usage.total_disk_size += stat->sm_total_size; + usage.total_valid_size += stat->sm_valid_size; + } + LOG_FMT_TRACE(log, "file usage [blob_id={}] [disk_size={}] [valid_size={}] [valid_rate={}]", stat->id, stat->sm_total_size, stat->sm_valid_size, stat->sm_valid_rate); + } + usage.total_file_num += stats.size(); + } + + return usage; +} + PageEntriesEdit BlobStore::handleLargeWrite(DB::WriteBatch & wb, const WriteLimiterPtr & write_limiter) { auto ns_id = wb.getNamespaceId(); @@ -872,6 +906,7 @@ struct BlobStoreGCInfo std::vector BlobStore::getGCStats() { + // Get a copy of stats map to avoid the big lock on stats map const auto stats_list = blob_stats.getStats(); std::vector blob_need_gc; BlobStoreGCInfo blobstore_gc_info; @@ -1196,7 +1231,7 @@ BlobStatPtr BlobStore::BlobStats::createStat(BlobFileId blob_file_id, const std: // New blob file id won't bigger than roll_id if (blob_file_id > roll_id) { - throw Exception(fmt::format("BlobStats won't create [blob_id={}], which is bigger than [RollMaxId={}]", + throw Exception(fmt::format("BlobStats won't create [blob_id={}], which is bigger than [roll_id={}]", blob_file_id, roll_id), ErrorCodes::LOGICAL_ERROR); @@ -1259,8 +1294,7 @@ BlobStatPtr BlobStore::BlobStats::createBigPageStatNotChecking(BlobFileId blob_f BlobStatPtr stat = std::make_shared( blob_file_id, SpaceMap::SpaceMapType::SMAP64_BIG, - config.file_limit_size, - BlobStatType::BIG_BLOB); + config.file_limit_size); PageFileIdAndLevel id_lvl{blob_file_id, 0}; stats_map[delegator->choosePath(id_lvl)].emplace_back(stat); @@ -1438,7 +1472,7 @@ bool BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, si if (!smap->markFree(offset, buf_size)) { smap->logDebugString(); - throw Exception(fmt::format("Remove postion from BlobStat failed, [offset={} , buf_size={}, blob_id={}] is invalid.", + throw Exception(fmt::format("Remove postion from BlobStat failed, invalid position [offset={}] [buf_size={}] [blob_id={}]", offset, buf_size, id), @@ -1455,7 +1489,7 @@ void BlobStore::BlobStats::BlobStat::restoreSpaceMap(BlobFileOffset offset, size if (!smap->markUsed(offset, buf_size)) { smap->logDebugString(); - throw Exception(fmt::format("Restore postion from BlobStat failed, [offset={}] [buf_size={}] [blob_id={}] is used or subspan is used", + throw Exception(fmt::format("Restore postion from BlobStat failed, the space/subspace is already being used [offset={}] [buf_size={}] [blob_id={}]", offset, buf_size, id), diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index e527eb0f3bf..3dbe813ab80 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -110,12 +111,17 @@ class BlobStore : private Allocator double sm_valid_rate = 1.0; public: - BlobStat(BlobFileId id_, SpaceMap::SpaceMapType sm_type, UInt64 sm_max_caps_, BlobStatType type_ = BlobStatType::NORMAL) + BlobStat(BlobFileId id_, SpaceMap::SpaceMapType sm_type, UInt64 sm_max_caps_) : id(id_) - , type(type_) + , type(BlobStatType::NORMAL) , smap(SpaceMap::createSpaceMap(sm_type, 0, sm_max_caps_)) , sm_max_caps(sm_max_caps_) { + if (sm_type == SpaceMap::SpaceMapType::SMAP64_BIG) + { + type = BlobStatType::BIG_BLOB; + } + // Won't create read-only blob by default. assert(type != BlobStatType::READ_ONLY); } @@ -246,10 +252,12 @@ class BlobStore : private Allocator std::map> stats_map; }; - BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, BlobStore::Config config); + BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, const BlobStore::Config & config); void registerPaths(); + FileUsageStatistics getFileUsageStatistics() const; + std::vector getGCStats(); PageEntriesEdit gc(std::map & entries_need_gc, diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index cfa07199637..a568bb5087f 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -80,6 +80,11 @@ DB::PageStorage::SnapshotPtr PageStorageImpl::getSnapshot(const String & tracing return page_directory->createSnapshot(tracing_id); } +FileUsageStatistics PageStorageImpl::getFileUsageStatistics() const +{ + return blob_store.getFileUsageStatistics(); +} + SnapshotsStatistics PageStorageImpl::getSnapshotsStat() const { return page_directory->getSnapshotsStat(); diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index 50d160e81da..f49601ce2ad 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -72,6 +72,8 @@ class PageStorageImpl : public DB::PageStorage SnapshotsStatistics getSnapshotsStat() const override; + FileUsageStatistics getFileUsageStatistics() const override; + size_t getNumberOfPages() override; std::set getAliveExternalPageIds(NamespaceId ns_id) override; diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index 9d30f249e60..ef851d67958 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -162,6 +162,11 @@ class KVStore final : private boost::noncopyable ~KVStore(); + FileUsageStatistics getFileUsageStatistics() const + { + return region_persister.getFileUsageStatistics(); + } + private: friend class MockTiDB; friend struct MockTiDBTable; diff --git a/dbms/src/Storages/Transaction/RegionPersister.cpp b/dbms/src/Storages/Transaction/RegionPersister.cpp index c3db88daece..7ce52c6caa1 100644 --- a/dbms/src/Storages/Transaction/RegionPersister.cpp +++ b/dbms/src/Storages/Transaction/RegionPersister.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -379,4 +380,9 @@ bool RegionPersister::gc() return stable_page_storage->gc(); } +FileUsageStatistics RegionPersister::getFileUsageStatistics() const +{ + return page_reader->getFileUsageStatistics(); +} + } // namespace DB diff --git a/dbms/src/Storages/Transaction/RegionPersister.h b/dbms/src/Storages/Transaction/RegionPersister.h index f2828add202..4eb381523dd 100644 --- a/dbms/src/Storages/Transaction/RegionPersister.h +++ b/dbms/src/Storages/Transaction/RegionPersister.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -57,6 +58,8 @@ class RegionPersister final : private boost::noncopyable PageStorage::Config getPageStorageSettings() const; + FileUsageStatistics getFileUsageStatistics() const; + #ifndef DBMS_PUBLIC_GTEST private: #endif From 66203942c4c6c21c88a7066acd7cb903db2ff7b8 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 27 May 2022 14:19:22 +0800 Subject: [PATCH 2/9] Remove verbose tracing Signed-off-by: JaySon-Huang --- dbms/src/Storages/Page/V3/BlobStore.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index fd000fb3e7f..588e8bc3114 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -141,7 +141,6 @@ FileUsageStatistics BlobStore::getFileUsageStatistics() const usage.total_disk_size += stat->sm_total_size; usage.total_valid_size += stat->sm_valid_size; } - LOG_FMT_TRACE(log, "file usage [blob_id={}] [disk_size={}] [valid_size={}] [valid_rate={}]", stat->id, stat->sm_total_size, stat->sm_valid_size, stat->sm_valid_rate); } usage.total_file_num += stats.size(); } From c0561ac9c4e6b923ec361d00b4e39a20f2702c27 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 27 May 2022 14:25:39 +0800 Subject: [PATCH 3/9] Show the file usage in grafana Signed-off-by: JaySon-Huang --- metrics/grafana/tiflash_summary.json | 2303 ++++++++++++++------------ 1 file changed, 1224 insertions(+), 1079 deletions(-) diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index a6d1abac46f..60b74360a39 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -58,7 +58,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1652861766192, + "iteration": 1653629613474, "links": [], "panels": [ { @@ -5336,14 +5336,30 @@ "align": false, "alignLevel": null } - }, + } + ], + "repeat": null, + "title": "Storage", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 119, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The states of BlobStore (an internal component of storage engine)", + "description": "The states of BlobStore (an internal component of PageStorage)", "fieldConfig": { "defaults": {}, "overrides": [] @@ -5354,20 +5370,20 @@ "h": 8, "w": 12, "x": 0, - "y": 69 + "y": 6 }, "hiddenSeries": false, "id": 85, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -5383,11 +5399,11 @@ "renderer": "flot", "seriesOverrides": [ { - "alias": "/^BlobAllocated/", + "alias": "/^allocated/", "yaxis": 1 }, { - "alias": "/^BlobExpandRate/", + "alias": "/^expand_rate/", "yaxis": 2 } ], @@ -5402,7 +5418,7 @@ "hide": false, "interval": "", "intervalFactor": 2, - "legendFormat": "BlobAllocated-{{instance}}", + "legendFormat": "allocated-{{instance}}", "refId": "A" }, { @@ -5412,7 +5428,7 @@ "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "BlobExpandRate-{{instance}}", + "legendFormat": "expand_rate-{{instance}}", "refId": "B" } ], @@ -5420,7 +5436,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "BlobStore Status", + "title": "PageStorage Blob Status", "tooltip": { "shared": true, "sort": 0, @@ -5457,23 +5473,7 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "Storage", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 64, - "panels": [ + }, { "aliasColors": {}, "bars": false, @@ -5481,29 +5481,29 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The throughput of write and delta's background management", + "description": "The disk usage of PageStorage instances in each TiFlash node", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 9, - "w": 24, - "x": 0, + "h": 8, + "w": 12, + "x": 12, "y": 6 }, - "height": "", "hiddenSeries": false, - "id": 70, + "id": 128, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, @@ -5516,7 +5516,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -5525,11 +5525,14 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeatedByRow": true, "seriesOverrides": [ { - "alias": "/total/", + "alias": "/^valid_rate/", "yaxis": 2 + }, + { + "alias": "/size/", + "linewidth": 3 } ], "spaceLength": 10, @@ -5538,47 +5541,51 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[1m]))", - "format": "time_series", + "expr": "tiflash_system_asynchronous_metric_BlobDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "throughput_write+ingest", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "disk_size-{{instance}}", + "refId": "A" }, { - "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"}[1m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "throughput_delta-management", + "exemplar": true, + "expr": "sum(tiflash_system_asynchronous_metric_BlobValidBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "valid_size-{{instance}}", "refId": "B" }, { "exemplar": true, - "expr": "sum(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"})", - "format": "time_series", + "expr": "sum((tiflash_system_asynchronous_metric_BlobValidBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) / (tiflash_system_asynchronous_metric_BlobDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})) by (instance)", + "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "total_write+ingest", + "legendFormat": "valid_rate-{{instance}}", "refId": "C" }, { - "expr": "sum(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"})", + "exemplar": true, + "expr": "sum(tiflash_system_asynchronous_metric_BlobFileNums{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "total_delta-management", - "refId": "D" + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "num_file-{{instance}}", + "refId": "E", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Write & Delta Management Throughput", + "title": "PageStorage Disk Usage", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -5591,7 +5598,7 @@ }, "yaxes": [ { - "format": "binBps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -5599,11 +5606,11 @@ "show": true }, { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": "1.1", + "min": "0", "show": true } ], @@ -5613,115 +5620,145 @@ } }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The stall duration of write and delete range", + "description": "The Global StoragePool and KVStore Runmode", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 5, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 11, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false + }, + "decimals": 0, + "mappings": [ + { + "from": "", + "id": 1, + "text": "ONLY_V2", + "to": "", + "type": 1, + "value": "1" + }, + { + "from": "", + "id": 2, + "text": "ONLY_V3", + "to": "", + "type": 1, + "value": "2" + }, + { + "from": "", + "id": 3, + "text": "MIX_MODE", + "to": "", + "type": 1, + "value": "3" + }, + { + "from": "", + "id": 4, + "text": " ", + "to": "", + "type": 1, + "value": "4" + }, + { + "from": "", + "id": 5, + "text": " ", + "to": "", + "type": 1, + "value": "5" + } + ], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 8, - "w": 24, + "w": 12, "x": 0, - "y": 15 - }, - "hiddenSeries": false, - "id": 62, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true + "y": 14 }, - "lines": true, - "linewidth": 1, + "id": 126, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "graph": {}, + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltipOptions": { + "mode": "multi" + } }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "99-delta_merge", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_write_stall_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, instance))", + "exemplar": true, + "expr": "tiflash_system_current_metric_GlobalStorageRunMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "99-{{type}}-{{instance}}", - "refId": "B" + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}-Global", + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(1, sum(rate(tiflash_storage_write_stall_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, instance))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "max-{{type}}-{{instance}}", - "refId": "A" + "exemplar": false, + "expr": "tiflash_system_current_metric_RegionPersisterRunMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}-KVStore", + "refId": "B" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Write Stall Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Global Runmode", + "type": "timeseries" }, { "aliasColors": {}, @@ -5730,29 +5767,29 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The throughput of write by instance", + "description": "The StoragePool Runmode in DeltaMerge Storage", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 23 + "h": 8, + "w": 12, + "x": 12, + "y": 14 }, - "height": "", "hiddenSeries": false, - "id": 89, + "id": 123, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, @@ -5765,7 +5802,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -5774,43 +5811,45 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeatedByRow": true, - "seriesOverrides": [ - { - "alias": "/total/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write\"}[1m])) by (instance)", + "expr": "sum(tiflash_system_current_metric_StoragePoolV2Only{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "throughput_write-{{instance}}", + "intervalFactor": 2, + "legendFormat": "{{instance}}-OnlyV2", "refId": "A", "step": 10 }, { "exemplar": true, - "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ingest\"}[1m])) by (instance)", + "expr": "sum(tiflash_system_current_metric_StoragePoolV3Only{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "hide": false, "interval": "", - "legendFormat": "throughput_ingest-{{instance}}", + "legendFormat": "{{instance}}-OnlyV3", "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_StoragePoolMixMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}-MixMode", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Write Throughput By Instance", + "title": "StoragePool Runmode", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -5825,7 +5864,7 @@ }, "yaxes": [ { - "format": "binBps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -5833,7 +5872,7 @@ "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -5845,43 +5884,65 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "StoragePool", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 64, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total count of different kinds of commands received", + "decimals": 1, + "description": "The throughput of write and delta's background management", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, - "y": 32 + "y": 71 }, + "height": "", "hiddenSeries": false, - "id": 90, + "id": 70, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": false, + "hideZero": false, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, @@ -5890,9 +5951,10 @@ "pointradius": 5, "points": false, "renderer": "flot", + "repeatedByRow": true, "seriesOverrides": [ { - "alias": "/delete_range|ingest/", + "alias": "/total/", "yaxis": 2 } ], @@ -5901,19 +5963,37 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tiflash_system_profile_event_DMWriteBlock{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "exemplar": true, + "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[1m]))", "format": "time_series", "hide": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "write block-{{instance}}", + "legendFormat": "throughput_write+ingest", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "throughput_delta-management", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "total_write+ingest", "refId": "C" }, { - "expr": "sum(increase(tiflash_storage_command_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "expr": "sum(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"})", "format": "time_series", - "hide": false, "intervalFactor": 1, - "legendFormat": "{{type}}-{{instance}}", + "legendFormat": "total_delta-management", "refId": "D" } ], @@ -5921,7 +6001,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Write Command OPS By Instance", + "title": "Write & Delta Management Throughput", "tooltip": { "shared": true, "sort": 0, @@ -5937,8 +6017,7 @@ }, "yaxes": [ { - "decimals": null, - "format": "ops", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -5946,11 +6025,11 @@ "show": true }, { - "format": "opm", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -5958,52 +6037,38 @@ "align": false, "alignLevel": null } - } - ], - "title": "Storage Write Stall", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 34, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "The stall duration of write and delete range", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, - "w": 12, + "h": 8, + "w": 24, "x": 0, - "y": 7 + "y": 80 }, "hiddenSeries": false, - "id": 35, + "id": 62, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -6017,16 +6082,29 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "99-delta_merge", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tiflash_raft_read_index_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_write_stall_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, instance))", "format": "time_series", + "hide": true, "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99-{{type}}-{{instance}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(1, sum(rate(tiflash_storage_write_stall_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, instance))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "max-{{type}}-{{instance}}", "refId": "A" } ], @@ -6034,7 +6112,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Raft Read Index OPS", + "title": "Write Stall Duration", "tooltip": { "shared": true, "sort": 0, @@ -6050,8 +6128,7 @@ }, "yaxes": [ { - "decimals": null, - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -6059,11 +6136,11 @@ "show": true }, { - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -6078,6 +6155,8 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The throughput of write by instance", "fieldConfig": { "defaults": {}, "overrides": [] @@ -6085,28 +6164,34 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 7 + "h": 9, + "w": 24, + "x": 0, + "y": 88 }, + "height": "", "hiddenSeries": false, - "id": 36, + "id": 89, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, @@ -6115,45 +6200,42 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "repeatedByRow": true, + "seriesOverrides": [ + { + "alias": "/total/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1.00, sum(rate(tiflash_raft_read_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "exemplar": true, + "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write\"}[1m])) by (instance)", "format": "time_series", + "hide": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "max", - "refId": "A" + "legendFormat": "throughput_write-{{instance}}", + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.99, sum(rate(tiflash_raft_read_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99", + "exemplar": true, + "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ingest\"}[1m])) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "throughput_ingest-{{instance}}", "refId": "B" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(tiflash_raft_read_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "95", - "refId": "C" - }, - { - "expr": "histogram_quantile(0.80, sum(rate(tiflash_raft_read_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "80", - "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Raft Batch Read Index Duration", + "title": "Write Throughput By Instance", "tooltip": { "shared": true, "sort": 0, @@ -6169,7 +6251,7 @@ }, "yaxes": [ { - "format": "s", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -6177,7 +6259,7 @@ "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -6196,26 +6278,25 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "The total count of different kinds of commands received", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { - "h": 7, - "w": 12, + "h": 9, + "w": 24, "x": 0, - "y": 14 + "y": 97 }, "hiddenSeries": false, - "id": 37, + "id": 90, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, "max": true, "min": false, "rightSide": true, @@ -6237,7 +6318,7 @@ "renderer": "flot", "seriesOverrides": [ { - "alias": "/timeout/", + "alias": "/delete_range|ingest/", "yaxis": 2 } ], @@ -6246,46 +6327,27 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1.00, sum(rate(tiflash_raft_wait_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "max", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tiflash_raft_wait_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(tiflash_raft_wait_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "sum(rate(tiflash_system_profile_event_DMWriteBlock{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", "format": "time_series", + "hide": false, "intervalFactor": 1, - "legendFormat": "95", + "legendFormat": "write block-{{instance}}", "refId": "C" }, { - "expr": "histogram_quantile(0.80, sum(rate(tiflash_raft_wait_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "sum(increase(tiflash_storage_command_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", "format": "time_series", + "hide": false, "intervalFactor": 1, - "legendFormat": "80", + "legendFormat": "{{type}}-{{instance}}", "refId": "D" - }, - { - "expr": "sum(increase(tiflash_system_profile_event_RaftWaitIndexTimeout{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}-timeout", - "refId": "E" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Raft Wait Index Duration", + "title": "Write Command OPS By Instance", "tooltip": { "shared": true, "sort": 0, @@ -6301,7 +6363,8 @@ }, "yaxes": [ { - "format": "s", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -6309,7 +6372,6 @@ "show": true }, { - "decimals": 2, "format": "opm", "label": null, "logBase": 1, @@ -6322,40 +6384,52 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "Storage Write Stall", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 34, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of currently applying snapshots.", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 14 + "x": 0, + "y": 7 }, "hiddenSeries": false, - "id": 75, + "id": 35, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -6375,19 +6449,18 @@ "steppedLine": false, "targets": [ { - "expr": "sum(tiflash_system_current_metric_RaftNumSnapshotsPendingApply{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(rate(tiflash_raft_read_index_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", "format": "time_series", - "hide": false, "intervalFactor": 1, - "legendFormat": "Pending-{{instance}}", - "refId": "B" + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Applying snapshots Count", + "title": "Raft Read Index OPS", "tooltip": { "shared": true, "sort": 0, @@ -6403,7 +6476,8 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -6411,7 +6485,7 @@ "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -6430,36 +6504,30 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of applying Raft write logs", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 24, - "x": 0, - "y": 21 + "w": 12, + "x": 12, + "y": 7 }, "hiddenSeries": false, - "id": 82, + "id": 36, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -6479,68 +6547,43 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": " 99%-{{type}}", - "metric": "", - "refId": "A", - "step": 4 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "expr": "histogram_quantile(1.00, sum(rate(tiflash_raft_read_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "95%-{{type}}", - "refId": "B", - "step": 4 + "intervalFactor": 1, + "legendFormat": "max", + "refId": "A" }, { - "exemplar": true, - "expr": "sum(rate(tiflash_raft_apply_write_command_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tiflash_raft_apply_write_command_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) ", + "expr": "histogram_quantile(0.99, sum(rate(tiflash_raft_read_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "avg-write", - "refId": "C", - "step": 4 + "intervalFactor": 1, + "legendFormat": "99", + "refId": "B" }, { - "exemplar": true, - "expr": "histogram_quantile(1.00, sum(rate(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "expr": "histogram_quantile(0.95, sum(rate(tiflash_raft_read_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": " 100%-{{type}}", - "refId": "D", - "step": 4 + "intervalFactor": 1, + "legendFormat": "95", + "refId": "C" }, { - "exemplar": true, - "expr": "sum(rate(tiflash_raft_apply_write_command_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"admin\"}[1m])) / sum(rate(tiflash_raft_apply_write_command_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"admin\"}[1m])) ", + "expr": "histogram_quantile(0.80, sum(rate(tiflash_raft_read_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "avg-admin", - "refId": "E", - "step": 4 + "intervalFactor": 1, + "legendFormat": "80", + "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Apply Raft write logs Duration", + "title": "Raft Batch Read Index Duration", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -6574,284 +6617,238 @@ } }, { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of applying Raft write logs", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 14 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 81, + "hiddenSeries": false, + "id": 37, "legend": { - "show": true + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/timeout/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(delta(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "expr": "histogram_quantile(1.00, sum(rate(tiflash_raft_wait_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "max", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tiflash_raft_wait_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "99", "refId": "B" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tiflash_raft_wait_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "95", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.80, sum(rate(tiflash_raft_wait_index_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "80", + "refId": "D" + }, + { + "expr": "sum(increase(tiflash_system_profile_event_RaftWaitIndexTimeout{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}-timeout", + "refId": "E" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Apply Raft write logs Duration [Heatmap]", + "title": "Raft Wait Index Duration", "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": 2, + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of applying Raft write logs", + "description": "The number of currently applying snapshots.", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 14 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 93, + "hiddenSeries": false, + "id": 75, "legend": { - "show": true + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(delta(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"admin\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "expr": "sum(tiflash_system_current_metric_RaftNumSnapshotsPendingApply{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Pending-{{instance}}", "refId": "B" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Apply Raft admin logs Duration [Heatmap]", + "title": "Applying snapshots Count", "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of pre-decode when applying region snapshot", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 35 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 72, - "legend": { - "show": true - }, - "links": [], - "reverseYBuckets": false, - "targets": [ + "yaxes": [ { - "expr": "sum(delta(tiflash_raft_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_predecode\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Snapshot Predecode Duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of SST to DT in pre-decode when applying region snapshot", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 35 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 127, - "legend": { - "show": true - }, - "links": [], - "reverseYBuckets": false, - "targets": [ + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, { - "expr": "sum(delta(tiflash_raft_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_predecode_sst2dt\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "B" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "timeFrom": null, - "timeShift": null, - "title": "Snapshot Predecode SST to DT Duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -6859,34 +6856,32 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The keys flow of different kinds of Raft operations", + "description": "Duration of applying Raft write logs", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 7, "w": 24, "x": 0, - "y": 42 + "y": 21 }, - "height": "", "hiddenSeries": false, - "id": 71, + "id": 82, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -6895,7 +6890,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -6904,31 +6899,74 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeatedByRow": true, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tiflash_raft_process_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", "format": "time_series", - "hide": false, + "interval": "", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": " 99%-{{type}}", + "metric": "", "refId": "A", - "step": 10 + "step": 4 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "95%-{{type}}", + "refId": "B", + "step": 4 + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_raft_apply_write_command_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tiflash_raft_apply_write_command_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) ", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg-write", + "refId": "C", + "step": 4 + }, + { + "exemplar": true, + "expr": "histogram_quantile(1.00, sum(rate(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": " 100%-{{type}}", + "refId": "D", + "step": 4 + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_raft_apply_write_command_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"admin\"}[1m])) / sum(rate(tiflash_raft_apply_write_command_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"admin\"}[1m])) ", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg-admin", + "refId": "E", + "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Keys flow", + "title": "Apply Raft write logs Duration", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -6940,7 +6978,7 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -6952,7 +6990,7 @@ "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -6976,7 +7014,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of pre-decode when applying region snapshot", + "description": "Duration of applying Raft write logs", "fieldConfig": { "defaults": {}, "overrides": [] @@ -6985,12 +7023,12 @@ "h": 7, "w": 12, "x": 0, - "y": 49 + "y": 28 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 73, + "id": 81, "legend": { "show": true }, @@ -6998,7 +7036,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum(delta(tiflash_raft_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_flush\"}[1m])) by (le)", + "expr": "sum(delta(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -7007,7 +7045,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Snapshot Flush Duration", + "title": "Apply Raft write logs Duration [Heatmap]", "tooltip": { "show": true, "showHistogram": true @@ -7041,11 +7079,12 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of ingesting SST", + "description": "Duration of applying Raft write logs", "fieldConfig": { "defaults": {}, "overrides": [] @@ -7054,12 +7093,12 @@ "h": 7, "w": 12, "x": 12, - "y": 49 + "y": 28 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 74, + "id": 93, "legend": { "show": true }, @@ -7067,7 +7106,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum(delta(tiflash_raft_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"ingest_sst\"}[1m])) by (le)", + "expr": "sum(delta(tiflash_raft_apply_write_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"admin\"}[1m])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -7076,7 +7115,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Ingest SST Duration", + "title": "Apply Raft admin logs Duration [Heatmap]", "tooltip": { "show": true, "showHistogram": true @@ -7115,7 +7154,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of decoding Region data into blocks when writing Region data to the storage layer. (Mixed with \"write logs\" and \"apply Snapshot\" operations)", + "description": "Duration of pre-decode when applying region snapshot", "fieldConfig": { "defaults": {}, "overrides": [] @@ -7124,12 +7163,12 @@ "h": 7, "w": 12, "x": 0, - "y": 56 + "y": 35 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 76, + "id": 72, "legend": { "show": true }, @@ -7137,7 +7176,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum(delta(tiflash_raft_write_data_to_storage_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"decode\"}[1m])) by (le)", + "expr": "sum(delta(tiflash_raft_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_predecode\"}[1m])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -7146,7 +7185,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Region write Duration (decode)", + "title": "Snapshot Predecode Duration", "tooltip": { "show": true, "showHistogram": true @@ -7185,7 +7224,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of writing Region data blocks to the storage layer (Mixed with \"write logs\" and \"apply Snapshot\" operations)", + "description": "Duration of SST to DT in pre-decode when applying region snapshot", "fieldConfig": { "defaults": {}, "overrides": [] @@ -7194,82 +7233,12 @@ "h": 7, "w": 12, "x": 12, - "y": 56 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 87, - "legend": { - "show": true - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum(delta(tiflash_raft_write_data_to_storage_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Region write Duration (write blocks)", - "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "Latency that TiKV sends raft log to TiFlash.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 63 + "y": 35 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 92, + "id": 127, "legend": { "show": true }, @@ -7277,7 +7246,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum(delta(tiflash_raft_upstream_latency_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "expr": "sum(delta(tiflash_raft_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_predecode_sst2dt\"}[1m])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -7286,7 +7255,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Upstream Latency [Heatmap]", + "title": "Snapshot Predecode SST to DT Duration", "tooltip": { "show": true, "showHistogram": true @@ -7316,32 +7285,34 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Latency that TiKV sends raft log to TiFlash.", - "editable": true, - "error": false, + "decimals": 1, + "description": "The keys flow of different kinds of Raft operations", "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 63 + "w": 24, + "x": 0, + "y": 42 }, + "height": "", "hiddenSeries": false, - "id": 91, + "id": 71, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": false, + "hideZero": false, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -7350,7 +7321,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, @@ -7359,56 +7330,31 @@ "pointradius": 5, "points": false, "renderer": "flot", + "repeatedByRow": true, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1.00, sum(rate(tiflash_raft_upstream_latency_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "sum(rate(tiflash_raft_process_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": " 100%", - "metric": "", + "legendFormat": "{{type}}", "refId": "A", - "step": 4 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tiflash_raft_upstream_latency_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": " 99%", - "metric": "", - "refId": "B", - "step": 4 - }, - { - "expr": "histogram_quantile(0.95, sum(rate(tiflash_raft_upstream_latency_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "C", - "step": 4 - }, - { - "expr": "sum(rate(tiflash_raft_upstream_latency_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tiflash_raft_upstream_latency_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) ", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "D", - "step": 4 + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Upstream Latency", + "title": "Keys flow", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -7420,11 +7366,11 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -7432,7 +7378,7 @@ "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -7440,169 +7386,215 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "Raft", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 95, - "panels": [ + }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "Duration of pre-decode when applying region snapshot", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 80 + "y": 49 }, - "hiddenSeries": false, - "id": 99, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 73, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true + "show": true }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/^RS Filter/", - "yaxis": 2 - }, - { - "alias": "/^PK/", - "yaxis": 2 - }, - { - "alias": "/^No Filter/", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "reverseYBuckets": false, "targets": [ { - "expr": "avg((rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]) - rate(tiflash_system_profile_event_DMFileFilterAftRoughSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / (rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))) by (instance)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "1min-{{instance}}", + "expr": "sum(delta(tiflash_raft_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_flush\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "B" - }, - { - "expr": "avg((rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) - rate(tiflash_system_profile_event_DMFileFilterAftRoughSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m])) / (rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]))) by (instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "5min-{{instance}}", - "refId": "C" - }, - { - "expr": "sum(rate(tiflash_system_profile_event_DMFileFilterNoFilter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "instant": false, - "intervalFactor": 1, - "legendFormat": "No Filter-{{instance}}", - "refId": "A" - }, - { - "expr": "sum(rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "instant": false, - "intervalFactor": 1, - "legendFormat": "PK Filter-{{instance}}", - "refId": "D" - }, - { - "expr": "sum(rate(tiflash_system_profile_event_DMFileFilterAftRoughSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "RS Filter-{{instance}}", - "refId": "E" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Rough Set Filter Rate", + "title": "Snapshot Flush Duration", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Duration of ingesting SST", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 74, + "legend": { + "show": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, + "expr": "sum(delta(tiflash_raft_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"ingest_sst\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Ingest SST Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Duration of decoding Region data into blocks when writing Region data to the storage layer. (Mixed with \"write logs\" and \"apply Snapshot\" operations)", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 76, + "legend": { + "show": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ { - "decimals": null, - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true + "expr": "sum(delta(tiflash_raft_write_data_to_storage_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"decode\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "B" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "timeFrom": null, + "timeShift": null, + "title": "Region write Duration (decode)", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cards": { @@ -7612,46 +7604,48 @@ "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateOranges", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "Duration of writing Region data blocks to the storage layer (Mixed with \"write logs\" and \"apply Snapshot\" operations)", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 80 + "y": 56 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 97, + "id": 87, "legend": { - "show": false + "show": true }, - "pluginVersion": "6.1.6", + "links": [], "reverseYBuckets": false, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tiflash_storage_rough_set_filter_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "expr": "sum(delta(tiflash_raft_write_data_to_storage_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le)", "format": "heatmap", - "interval": "", + "intervalFactor": 2, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "refId": "B" } ], - "title": "Rough Set Filter Rate Histogram", + "timeFrom": null, + "timeShift": null, + "title": "Region write Duration (write blocks)", "tooltip": { "show": true, - "showHistogram": false + "showHistogram": true }, "type": "heatmap", "xAxis": { @@ -7661,198 +7655,254 @@ "xBucketSize": null, "yAxis": { "decimals": 0, - "format": "percent", + "format": "s", "logBase": 1, "max": null, "min": null, "show": true, "splitFactor": null }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null - } - ], - "title": "Rough Set Filter Rate Histogram", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 119, - "panels": [ + }, { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The Global StoragePool and KVStore Runmode", + "description": "Latency that TiKV sends raft log to TiFlash.", "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 5, - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 11, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false - }, - "decimals": 0, - "mappings": [ - { - "from": "", - "id": 1, - "text": "ONLY_V2", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 2, - "text": "ONLY_V3", - "to": "", - "type": 1, - "value": "2" - }, - { - "from": "", - "id": 3, - "text": "MIX_MODE", - "to": "", - "type": 1, - "value": "3" - }, - { - "from": "", - "id": 4, - "text": " ", - "to": "", - "type": 1, - "value": "4" - }, - { - "from": "", - "id": 5, - "text": " ", - "to": "", - "type": 1, - "value": "5" - } - ], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, + "defaults": {}, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 9 + "y": 63 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 92, + "legend": { + "show": true }, - "id": 126, "links": [], - "options": { - "graph": {}, - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltipOptions": { - "mode": "multi" + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tiflash_raft_upstream_latency_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "B" } + ], + "timeFrom": null, + "timeShift": null, + "title": "Upstream Latency [Heatmap]", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Latency that TiKV sends raft log to TiFlash.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 }, + "hiddenSeries": false, + "id": 91, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tiflash_system_current_metric_GlobalStorageRunMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "histogram_quantile(1.00, sum(rate(tiflash_raft_upstream_latency_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", - "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}-GlobalRunMode", + "legendFormat": " 100%", + "metric": "", "refId": "A", - "step": 10 + "step": 4 }, { - "exemplar": false, - "expr": "tiflash_system_current_metric_RegionPersisterRunMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}-KVStoreRunMode", - "refId": "B" + "expr": "histogram_quantile(0.99, sum(rate(tiflash_raft_upstream_latency_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": " 99%", + "metric": "", + "refId": "B", + "step": 4 + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tiflash_raft_upstream_latency_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "95%", + "refId": "C", + "step": 4 + }, + { + "expr": "sum(rate(tiflash_raft_upstream_latency_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tiflash_raft_upstream_latency_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "refId": "D", + "step": 4 } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Global Runmode", - "type": "timeseries" - }, + "title": "Upstream Latency", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Raft", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 95, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The StoragePool Runmode in DeltaMerge Storage", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 0, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 9 + "x": 0, + "y": 80 }, "hiddenSeries": false, - "id": 123, + "id": 99, "legend": { "alignAsTable": true, "avg": false, @@ -7861,9 +7911,6 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, @@ -7879,45 +7926,73 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/^RS Filter/", + "yaxis": 2 + }, + { + "alias": "/^PK/", + "yaxis": 2 + }, + { + "alias": "/^No Filter/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tiflash_system_current_metric_StoragePoolV2Only{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "avg((rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]) - rate(tiflash_system_profile_event_DMFileFilterAftRoughSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / (rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))) by (instance)", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}-OnlyV2", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "sum(tiflash_system_current_metric_StoragePoolV3Only{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "hide": false, - "interval": "", - "legendFormat": "{{instance}}-OnlyV3", + "intervalFactor": 1, + "legendFormat": "1min-{{instance}}", "refId": "B" }, { - "exemplar": true, - "expr": "sum(tiflash_system_current_metric_StoragePoolMixMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}-MixMode", + "expr": "avg((rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) - rate(tiflash_system_profile_event_DMFileFilterAftRoughSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m])) / (rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]))) by (instance)", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "5min-{{instance}}", "refId": "C" + }, + { + "expr": "sum(rate(tiflash_system_profile_event_DMFileFilterNoFilter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "hide": true, + "instant": false, + "intervalFactor": 1, + "legendFormat": "No Filter-{{instance}}", + "refId": "A" + }, + { + "expr": "sum(rate(tiflash_system_profile_event_DMFileFilterAftPKAndPackSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "hide": true, + "instant": false, + "intervalFactor": 1, + "legendFormat": "PK Filter-{{instance}}", + "refId": "D" + }, + { + "expr": "sum(rate(tiflash_system_profile_event_DMFileFilterAftRoughSet{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "RS Filter-{{instance}}", + "refId": "E" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "StoragePool Runmode", + "title": "Rough Set Filter Rate", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -7932,7 +8007,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -7940,8 +8016,9 @@ "show": true }, { + "decimals": null, "format": "short", - "label": null, + "label": "", "logBase": 1, "max": null, "min": null, @@ -7952,13 +8029,81 @@ "align": false, "alignLevel": null } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 80 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 97, + "legend": { + "show": false + }, + "pluginVersion": "6.1.6", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tiflash_storage_rough_set_filter_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Rough Set Filter Rate Histogram", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "percent", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null } ], - "title": "StoragePool", + "title": "Rough Set Filter Rate Histogram", "type": "row" } ], - "refresh": false, + "refresh": "30s", "schemaVersion": 27, "style": "dark", "tags": [], From 8f780d8585fd24f2237d9e67704db5f2e7dae1d5 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 27 May 2022 14:49:22 +0800 Subject: [PATCH 4/9] Refine code Signed-off-by: JaySon-Huang --- dbms/src/Interpreters/AsynchronousMetrics.cpp | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index c94f87bacf4..80278eea08a 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -184,25 +184,24 @@ void AsynchronousMetrics::update() do { - FileUsageStatistics usage; + FileUsageStatistics usage = kvstore->getFileUsageStatistics(); // Get from RegionPersister auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); - const auto kvstore_usage = kvstore->getFileUsageStatistics(); // Get the blob file status from all PS V3 instances - auto global_storage_pool = context.getGlobalStoragePool(); - if (global_storage_pool == nullptr) - break; - const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics(); - const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics(); - const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics(); + if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr) + { + const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics(); + const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics(); + const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics(); - usage.total_file_num = kvstore_usage.total_file_num + log_usage.total_file_num + meta_usage.total_file_num + data_usage.total_file_num; - usage.total_disk_size = kvstore_usage.total_disk_size + log_usage.total_disk_size + meta_usage.total_disk_size + data_usage.total_disk_size; - usage.total_valid_size = kvstore_usage.total_valid_size + log_usage.total_valid_size + meta_usage.total_valid_size + data_usage.total_valid_size; + usage.total_file_num += log_usage.total_file_num + meta_usage.total_file_num + data_usage.total_file_num; + usage.total_disk_size += log_usage.total_disk_size + meta_usage.total_disk_size + data_usage.total_disk_size; + usage.total_valid_size += log_usage.total_valid_size + meta_usage.total_valid_size + data_usage.total_valid_size; + } set("BlobFileNums", usage.total_file_num); set("BlobDiskBytes", usage.total_disk_size); From 4db7bb55f6963170ff4a08128ed734f515b8a616 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 27 May 2022 15:23:05 +0800 Subject: [PATCH 5/9] update grafana Signed-off-by: JaySon-Huang --- metrics/grafana/tiflash_summary.json | 237 +++++++++++---------------- 1 file changed, 98 insertions(+), 139 deletions(-) diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 60b74360a39..f899a47ed10 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -33,12 +33,6 @@ "id": "prometheus", "name": "Prometheus", "version": "1.0.0" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" } ], "annotations": { @@ -58,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1653629613474, + "iteration": 1653635389238, "links": [], "panels": [ { @@ -5508,8 +5502,6 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, @@ -5620,145 +5612,112 @@ } }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The Global StoragePool and KVStore Runmode", + "decimals": 1, + "description": "The number of files of PageStorage instances in each TiFlash node", + "editable": true, + "error": false, "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 5, - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 11, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false - }, - "decimals": 0, - "mappings": [ - { - "from": "", - "id": 1, - "text": "ONLY_V2", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 2, - "text": "ONLY_V3", - "to": "", - "type": 1, - "value": "2" - }, - { - "from": "", - "id": 3, - "text": "MIX_MODE", - "to": "", - "type": 1, - "value": "3" - }, - { - "from": "", - "id": 4, - "text": " ", - "to": "", - "type": 1, - "value": "4" - }, - { - "from": "", - "id": 5, - "text": " ", - "to": "", - "type": 1, - "value": "5" - } - ], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, + "defaults": {}, "overrides": [] }, + "fill": 0, + "fillGradient": 0, + "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 }, - "id": 126, + "hiddenSeries": false, + "id": 129, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, "links": [], + "nullPointMode": "null as zero", "options": { - "graph": {}, - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltipOptions": { - "mode": "multi" - } + "alertThreshold": true }, + "percentage": false, "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "tiflash_system_current_metric_GlobalStorageRunMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum(tiflash_system_asynchronous_metric_BlobFileNums{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", + "hide": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}-Global", + "legendFormat": "num_file-{{instance}}", "refId": "A", "step": 10 - }, - { - "exemplar": false, - "expr": "tiflash_system_current_metric_RegionPersisterRunMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}-KVStore", - "refId": "B" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Global Runmode", - "type": "timeseries" + "title": "PageStorage File Num", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1.1", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -5767,7 +5726,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The StoragePool Runmode in DeltaMerge Storage", + "description": "The number of tables running under different mode in DeltaTree", "editable": true, "error": false, "fieldConfig": { @@ -6416,7 +6375,7 @@ "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 8 }, "hiddenSeries": false, "id": 35, @@ -6514,7 +6473,7 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 8 }, "hiddenSeries": false, "id": 36, @@ -6632,7 +6591,7 @@ "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 15 }, "hiddenSeries": false, "id": 37, @@ -6766,7 +6725,7 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 15 }, "hiddenSeries": false, "id": 75, @@ -6870,7 +6829,7 @@ "h": 7, "w": 24, "x": 0, - "y": 21 + "y": 22 }, "hiddenSeries": false, "id": 82, @@ -7023,7 +6982,7 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 29 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7093,7 +7052,7 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 29 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7163,7 +7122,7 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 36 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7233,7 +7192,7 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 36 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7297,7 +7256,7 @@ "h": 7, "w": 24, "x": 0, - "y": 42 + "y": 43 }, "height": "", "hiddenSeries": false, @@ -7411,7 +7370,7 @@ "h": 7, "w": 12, "x": 0, - "y": 49 + "y": 50 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7480,7 +7439,7 @@ "h": 7, "w": 12, "x": 12, - "y": 49 + "y": 50 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7550,7 +7509,7 @@ "h": 7, "w": 12, "x": 0, - "y": 56 + "y": 57 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7620,7 +7579,7 @@ "h": 7, "w": 12, "x": 12, - "y": 56 + "y": 57 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7690,7 +7649,7 @@ "h": 7, "w": 12, "x": 0, - "y": 63 + "y": 64 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7756,7 +7715,7 @@ "h": 7, "w": 12, "x": 12, - "y": 63 + "y": 64 }, "hiddenSeries": false, "id": 91, From fce4916ff67388fe28e5c39151bb064c2017c598 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 27 May 2022 16:03:42 +0800 Subject: [PATCH 6/9] fix Signed-off-by: JaySon-Huang --- dbms/src/Interpreters/AsynchronousMetrics.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index 80278eea08a..22c6259c6a5 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -184,14 +184,13 @@ void AsynchronousMetrics::update() do { - FileUsageStatistics usage = kvstore->getFileUsageStatistics(); // Get from RegionPersister auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); + FileUsageStatistics usage = kvstore->getFileUsageStatistics(); // Get the blob file status from all PS V3 instances - if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr) { const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics(); From 40688ec14c30e24614e3093090eebc685cced0bb Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 27 May 2022 16:09:05 +0800 Subject: [PATCH 7/9] Add comment Signed-off-by: JaySon-Huang --- dbms/src/Server/Server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 4f1cb3bbfdf..8656d071cd5 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -1443,6 +1443,7 @@ int Server::main(const std::vector & /*args*/) } /// This object will periodically calculate some metrics. + /// should init after `createTMTContext` cause we collect some data from the TiFlash context object. AsynchronousMetrics async_metrics(*global_context); attachSystemTablesAsync(*global_context->getDatabase("system"), async_metrics); From 9b43d99b05f1ec59de7804bfc419f464b1bf9468 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 27 May 2022 17:04:54 +0800 Subject: [PATCH 8/9] Address comment Signed-off-by: JaySon-Huang --- dbms/src/Interpreters/AsynchronousMetrics.cpp | 43 ++++++++++--------- dbms/src/Interpreters/AsynchronousMetrics.h | 5 +++ dbms/src/Storages/Page/FileUsage.h | 1 - .../Storages/Transaction/RegionPersister.h | 2 +- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index 22c6259c6a5..8095fbb0e59 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -129,6 +129,26 @@ static void calculateMaxAndSum(Max & max, Sum & sum, T x) max = x; } +FileUsageStatistics AsynchronousMetrics::getPageStorageFileUsage() +{ + // Get from RegionPersister + auto & tmt = context.getTMTContext(); + auto & kvstore = tmt.getKVStore(); + FileUsageStatistics usage = kvstore->getFileUsageStatistics(); + + // Get the blob file status from all PS V3 instances + if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr) + { + const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics(); + const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics(); + const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics(); + + usage.total_file_num += log_usage.total_file_num + meta_usage.total_file_num + data_usage.total_file_num; + usage.total_disk_size += log_usage.total_disk_size + meta_usage.total_disk_size + data_usage.total_disk_size; + usage.total_valid_size += log_usage.total_valid_size + meta_usage.total_valid_size + data_usage.total_valid_size; + } + return usage; +} void AsynchronousMetrics::update() { @@ -182,31 +202,12 @@ void AsynchronousMetrics::update() set("MaxDTBackgroundTasksLength", max_dt_background_tasks_length); } - do { - - // Get from RegionPersister - auto & tmt = context.getTMTContext(); - auto & kvstore = tmt.getKVStore(); - FileUsageStatistics usage = kvstore->getFileUsageStatistics(); - - // Get the blob file status from all PS V3 instances - if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr) - { - const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics(); - const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics(); - const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics(); - - usage.total_file_num += log_usage.total_file_num + meta_usage.total_file_num + data_usage.total_file_num; - usage.total_disk_size += log_usage.total_disk_size + meta_usage.total_disk_size + data_usage.total_disk_size; - usage.total_valid_size += log_usage.total_valid_size + meta_usage.total_valid_size + data_usage.total_valid_size; - } - + const FileUsageStatistics usage = getPageStorageFileUsage(); set("BlobFileNums", usage.total_file_num); set("BlobDiskBytes", usage.total_disk_size); set("BlobValidBytes", usage.total_valid_size); - - } while (false); + } #if USE_TCMALLOC { diff --git a/dbms/src/Interpreters/AsynchronousMetrics.h b/dbms/src/Interpreters/AsynchronousMetrics.h index 5de328601a6..536e6a6b6f6 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.h +++ b/dbms/src/Interpreters/AsynchronousMetrics.h @@ -14,6 +14,8 @@ #pragma once +#include + #include #include #include @@ -47,6 +49,9 @@ class AsynchronousMetrics /// Returns copy of all values. Container getValues() const; +private: + FileUsageStatistics getPageStorageFileUsage(); + private: Context & context; diff --git a/dbms/src/Storages/Page/FileUsage.h b/dbms/src/Storages/Page/FileUsage.h index 813da99f65d..6319f4a4acf 100644 --- a/dbms/src/Storages/Page/FileUsage.h +++ b/dbms/src/Storages/Page/FileUsage.h @@ -19,7 +19,6 @@ namespace DB { - struct FileUsageStatistics { size_t total_disk_size = 0; diff --git a/dbms/src/Storages/Transaction/RegionPersister.h b/dbms/src/Storages/Transaction/RegionPersister.h index 4eb381523dd..a6b400345f8 100644 --- a/dbms/src/Storages/Transaction/RegionPersister.h +++ b/dbms/src/Storages/Transaction/RegionPersister.h @@ -16,10 +16,10 @@ #include #include +#include #include #include #include -#include namespace DB { From dcd875f09db095837d32b1e9a9ab70422389b40a Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 27 May 2022 17:53:39 +0800 Subject: [PATCH 9/9] Fix lint Signed-off-by: JaySon-Huang --- dbms/src/Server/Server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 8656d071cd5..fcf820eb958 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -464,7 +464,7 @@ struct RaftStoreProxyRunner : boost::noncopyable } RunRaftStoreProxyParms parms; - pthread_t thread; + pthread_t thread{}; Poco::Logger * log; }; @@ -1155,7 +1155,7 @@ int Server::main(const std::vector & /*args*/) /// Try to increase limit on number of open files. { - rlimit rlim; + rlimit rlim{}; if (getrlimit(RLIMIT_NOFILE, &rlim)) throw Poco::Exception("Cannot getrlimit");