From a3797aaf1c16539d16c2e20d2e5ec2e102d1b2df Mon Sep 17 00:00:00 2001 From: jievince <38901892+jievince@users.noreply.github.com> Date: Wed, 29 Dec 2021 17:02:27 +0800 Subject: [PATCH] add some space level metrics --- src/graph/executor/Executor.cpp | 20 +++++++++++++++++++- src/graph/optimizer/rule/IndexScanRule.cpp | 1 + src/graph/service/GraphService.cpp | 10 ++++++++-- src/graph/service/QueryInstance.cpp | 19 ++++++++++++++++++- src/graph/session/ClientSession.cpp | 9 +++++++++ src/graph/stats/GraphStats.cpp | 5 +++-- src/graph/stats/GraphStats.h | 2 +- 7 files changed, 59 insertions(+), 7 deletions(-) diff --git a/src/graph/executor/Executor.cpp b/src/graph/executor/Executor.cpp index b482cf1ab1c..4e1dfee089b 100644 --- a/src/graph/executor/Executor.cpp +++ b/src/graph/executor/Executor.cpp @@ -155,16 +155,25 @@ Executor *Executor::makeExecutor(const PlanNode *node, // static Executor *Executor::makeExecutor(QueryContext *qctx, const PlanNode *node) { auto pool = qctx->objPool(); + auto &spaceName = qctx->rctx() ? qctx->rctx()->session()->spaceName() : ""; switch (node->kind()) { case PlanNode::Kind::kPassThrough: { return pool->add(new PassThroughExecutor(node, qctx)); } case PlanNode::Kind::kAggregate: { stats::StatsManager::addValue(kNumAggregateExecutors); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumAggregateExecutors, {{"space", spaceName}})); + } return pool->add(new AggregateExecutor(node, qctx)); } case PlanNode::Kind::kSort: { stats::StatsManager::addValue(kNumSortExecutors); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumSortExecutors, {{"space", spaceName}})); + } return pool->add(new SortExecutor(node, qctx)); } case PlanNode::Kind::kTopN: { @@ -208,6 +217,10 @@ Executor *Executor::makeExecutor(QueryContext *qctx, const PlanNode *node) { case PlanNode::Kind::kTagIndexPrefixScan: case PlanNode::Kind::kTagIndexRangeScan: { stats::StatsManager::addValue(kNumIndexScanExecutors); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumIndexScanExecutors, {{"space", spaceName}})); + } return pool->add(new IndexScanExecutor(node, qctx)); } case PlanNode::Kind::kStart: { @@ -594,7 +607,12 @@ Status Executor::close() { Status Executor::checkMemoryWatermark() { if (node_->isQueryNode() && MemoryUtils::kHitMemoryHighWatermark.load()) { - stats::StatsManager::addValue(kNumOomExecutors); + stats::StatsManager::addValue(kNumQueriesHitMemoryWatermark); + auto &spaceName = qctx()->rctx() ? qctx()->rctx()->session()->spaceName() : ""; + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue(stats::StatsManager::counterWithLabels( + kNumQueriesHitMemoryWatermark, {{"space", spaceName}})); + } return Status::Error("Used memory hits the high watermark(%lf) of total system memory.", FLAGS_system_memory_high_watermark_ratio); } diff --git a/src/graph/optimizer/rule/IndexScanRule.cpp b/src/graph/optimizer/rule/IndexScanRule.cpp index f1ae0ea24af..5ad02a164f4 100644 --- a/src/graph/optimizer/rule/IndexScanRule.cpp +++ b/src/graph/optimizer/rule/IndexScanRule.cpp @@ -4,6 +4,7 @@ */ #include "graph/optimizer/rule/IndexScanRule.h" + #include "graph/optimizer/OptContext.h" #include "graph/optimizer/OptGroup.h" #include "graph/optimizer/OptRule.h" diff --git a/src/graph/service/GraphService.cpp b/src/graph/service/GraphService.cpp index 73f903526e6..7229b548ddb 100644 --- a/src/graph/service/GraphService.cpp +++ b/src/graph/service/GraphService.cpp @@ -134,8 +134,6 @@ folly::Future GraphService::future_executeWithParameter( ctx->setRunner(getThreadManager()); ctx->setSessionMgr(sessionManager_.get()); auto future = ctx->future(); - stats::StatsManager::addValue(kNumQueries); - stats::StatsManager::addValue(kNumActiveQueries); // When the sessionId is 0, it means the clients to ping the connection is ok if (sessionId == 0) { ctx->resp().errorCode = ErrorCode::E_SESSION_INVALID; @@ -161,14 +159,22 @@ folly::Future GraphService::future_executeWithParameter( return ctx->finish(); } stats::StatsManager::addValue(kNumQueries); + stats::StatsManager::addValue(kNumActiveQueries); if (FLAGS_enable_space_level_metrics && sessionPtr->space().name != "") { stats::StatsManager::addValue(stats::StatsManager::counterWithLabels( kNumQueries, {{"space", sessionPtr->space().name}})); + stats::StatsManager::addValue(stats::StatsManager::counterWithLabels( + kNumActiveQueries, {{"space", sessionPtr->space().name}})); } + auto& spaceName = sessionPtr->space().name; ctx->setSession(std::move(sessionPtr)); ctx->setParameterMap(parameterMap); queryEngine_->execute(std::move(ctx)); stats::StatsManager::decValue(kNumActiveQueries); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::decValue( + stats::StatsManager::counterWithLabels(kNumActiveQueries, {{"space", spaceName}})); + } }; sessionManager_->findSession(sessionId, getThreadManager()).thenValue(std::move(cb)); return future; diff --git a/src/graph/service/QueryInstance.cpp b/src/graph/service/QueryInstance.cpp index 09d6769f90d..aa56c9e58d5 100644 --- a/src/graph/service/QueryInstance.cpp +++ b/src/graph/service/QueryInstance.cpp @@ -64,6 +64,7 @@ void QueryInstance::execute() { Status QueryInstance::validateAndOptimize() { auto *rctx = qctx()->rctx(); + auto &spaceName = rctx->session()->space().name; VLOG(1) << "Parsing query: " << rctx->query(); auto result = GQLParser(qctx()).parse(rctx->query()); NG_RETURN_IF_ERROR(result); @@ -71,13 +72,25 @@ Status QueryInstance::validateAndOptimize() { if (sentence_->kind() == Sentence::Kind::kSequential) { size_t num = static_cast(sentence_.get())->numSentences(); stats::StatsManager::addValue(kNumSentences, num); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumSentences, {{"space", spaceName}}), num); + } } else { stats::StatsManager::addValue(kNumSentences); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumSentences, {{"space", spaceName}})); + } } NG_RETURN_IF_ERROR(Validator::validate(sentence_.get(), qctx())); NG_RETURN_IF_ERROR(findBestPlan()); stats::StatsManager::addValue(kOptimizerLatencyUs, *(qctx_->plan()->optimizeTimeInUs())); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue( + stats::StatsManager::histoWithLabels(kOptimizerLatencyUs, {{"space", spaceName}})); + } return Status::OK(); } @@ -117,6 +130,7 @@ void QueryInstance::onFinish() { void QueryInstance::onError(Status status) { LOG(ERROR) << status; auto *rctx = qctx()->rctx(); + auto &spaceName = rctx->session()->space().name; switch (status.code()) { case Status::Code::kOk: rctx->resp().errorCode = ErrorCode::SUCCEEDED; @@ -135,6 +149,10 @@ void QueryInstance::onError(Status status) { break; case Status::Code::kLeaderChanged: stats::StatsManager::addValue(kNumQueryErrorsLeaderChanges); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue(stats::StatsManager::counterWithLabels( + kNumQueryErrorsLeaderChanges, {{"space", spaceName}})); + } [[fallthrough]]; case Status::Code::kBalanced: case Status::Code::kEdgeNotFound: @@ -157,7 +175,6 @@ void QueryInstance::onError(Status status) { rctx->resp().errorCode = ErrorCode::E_EXECUTION_ERROR; break; } - auto &spaceName = rctx->session()->space().name; rctx->resp().spaceName = std::make_unique(spaceName); rctx->resp().errorMsg = std::make_unique(status.toString()); auto latency = rctx->duration().elapsedInUSec(); diff --git a/src/graph/session/ClientSession.cpp b/src/graph/session/ClientSession.cpp index d812ee586a8..1e39ff0c49c 100644 --- a/src/graph/session/ClientSession.cpp +++ b/src/graph/session/ClientSession.cpp @@ -78,6 +78,10 @@ void ClientSession::markQueryKilled(nebula::ExecutionPlanID epId) { } context->second->markKilled(); stats::StatsManager::addValue(kNumKilledQueries); + if (FLAGS_enable_space_level_metrics && space_.name != "") { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumKilledQueries, {{"space", space_.name}})); + } VLOG(1) << "Mark query killed in local cache, epId: " << epId; auto query = session_.queries_ref()->find(epId); @@ -95,6 +99,11 @@ void ClientSession::markAllQueryKilled() { session_.queries_ref()->clear(); } stats::StatsManager::addValue(kNumKilledQueries, contexts_.size()); + if (FLAGS_enable_space_level_metrics && space_.name != "") { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumKilledQueries, {{"space", space_.name}}), + contexts_.size()); + } } } // namespace graph } // namespace nebula diff --git a/src/graph/stats/GraphStats.cpp b/src/graph/stats/GraphStats.cpp index adab4a84352..f03cd46885b 100644 --- a/src/graph/stats/GraphStats.cpp +++ b/src/graph/stats/GraphStats.cpp @@ -26,13 +26,13 @@ stats::CounterId kNumSentences; stats::CounterId kQueryLatencyUs; stats::CounterId kSlowQueryLatencyUs; stats::CounterId kNumKilledQueries; +stats::CounterId kNumQueriesHitMemoryWatermark; stats::CounterId kOptimizerLatencyUs; stats::CounterId kNumAggregateExecutors; stats::CounterId kNumSortExecutors; stats::CounterId kNumIndexScanExecutors; -stats::CounterId kNumOomExecutors; stats::CounterId kNumOpenedSessions; stats::CounterId kNumAuthFailedSessions; @@ -54,6 +54,8 @@ void initGraphStats() { kSlowQueryLatencyUs = stats::StatsManager::registerHisto( "slow_query_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999"); kNumKilledQueries = stats::StatsManager::registerStats("num_killed_queries", "rate, sum"); + kNumQueriesHitMemoryWatermark = + stats::StatsManager::registerStats("num_queries_hit_memory_watermark", "rate, sum"); kOptimizerLatencyUs = stats::StatsManager::registerHisto( "optimizer_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999"); @@ -63,7 +65,6 @@ void initGraphStats() { kNumSortExecutors = stats::StatsManager::registerStats("num_sort_executors", "rate, sum"); kNumIndexScanExecutors = stats::StatsManager::registerStats("num_indexscan_executors", "rate, sum"); - kNumOomExecutors = stats::StatsManager::registerStats("num_oom_executors", "rate, sum"); kNumOpenedSessions = stats::StatsManager::registerStats("num_opened_sessions", "rate, sum"); kNumAuthFailedSessions = diff --git a/src/graph/stats/GraphStats.h b/src/graph/stats/GraphStats.h index 70bb6a1efd1..2d89e60e4c5 100644 --- a/src/graph/stats/GraphStats.h +++ b/src/graph/stats/GraphStats.h @@ -23,6 +23,7 @@ extern stats::CounterId kNumSentences; extern stats::CounterId kQueryLatencyUs; extern stats::CounterId kSlowQueryLatencyUs; extern stats::CounterId kNumKilledQueries; +extern stats::CounterId kNumQueriesHitMemoryWatermark; extern stats::CounterId kOptimizerLatencyUs; @@ -30,7 +31,6 @@ extern stats::CounterId kOptimizerLatencyUs; extern stats::CounterId kNumAggregateExecutors; extern stats::CounterId kNumSortExecutors; extern stats::CounterId kNumIndexScanExecutors; -extern stats::CounterId kNumOomExecutors; // Server client traffic // extern stats::CounterId kReceivedBytes;