Skip to content

Commit 393118e

Browse files
committed
[core][stats-die/04] kill STATS in the common component
Signed-off-by: Cuong Nguyen <can@anyscale.com>
1 parent ef36a1c commit 393118e

File tree

17 files changed

+118
-79
lines changed

17 files changed

+118
-79
lines changed

src/ray/common/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ ray_cc_library(
281281
],
282282
deps = [
283283
":event_stats",
284+
":metrics",
284285
":ray_config",
285286
"//src/ray/util:array",
286287
"//src/ray/util:function_traits",
@@ -311,6 +312,7 @@ ray_cc_library(
311312
"event_stats.h",
312313
],
313314
deps = [
315+
":metrics",
314316
":ray_config",
315317
"//src/ray/stats:stats_metric",
316318
"//src/ray/util:time",

src/ray/common/asio/instrumented_io_context.cc

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "ray/common/asio/asio_chaos.h"
2121
#include "ray/common/asio/asio_util.h"
2222
#include "ray/stats/metric.h"
23-
#include "ray/stats/metric_defs.h"
2423

2524
namespace {
2625

@@ -35,7 +34,7 @@ void LagProbeLoop(instrumented_io_context &io_context,
3534
auto end = std::chrono::steady_clock::now();
3635
auto duration =
3736
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin);
38-
ray::stats::STATS_io_context_event_loop_lag_ms.Record(
37+
io_context.io_context_event_loop_lag_ms_gauge_metric.Record(
3938
duration.count(),
4039
{
4140
{"Name", context_name.value_or(GetThreadName())},
@@ -106,8 +105,9 @@ void instrumented_io_context::post(std::function<void()> handler,
106105
auto stats_handle =
107106
event_stats_->RecordStart(std::move(name), emit_metrics_, 0, context_name_);
108107
handler = [handler = std::move(handler),
108+
event_stats = event_stats_,
109109
stats_handle = std::move(stats_handle)]() mutable {
110-
EventTracker::RecordExecution(handler, std::move(stats_handle));
110+
event_stats->RecordExecution(handler, std::move(stats_handle));
111111
};
112112
}
113113

@@ -129,9 +129,10 @@ void instrumented_io_context::dispatch(std::function<void()> handler, std::strin
129129
// GuardedHandlerStats synchronizes internal access, we can concurrently write to the
130130
// handler stats it->second from multiple threads without acquiring a table-level
131131
// readers lock in the callback.
132-
boost::asio::dispatch(
133-
*this,
134-
[handler = std::move(handler), stats_handle = std::move(stats_handle)]() mutable {
135-
EventTracker::RecordExecution(handler, std::move(stats_handle));
136-
});
132+
boost::asio::dispatch(*this,
133+
[event_stats = event_stats_,
134+
handler = std::move(handler),
135+
stats_handle = std::move(stats_handle)]() mutable {
136+
event_stats->RecordExecution(handler, std::move(stats_handle));
137+
});
137138
}

src/ray/common/asio/instrumented_io_context.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <string>
2020

2121
#include "ray/common/event_stats.h"
22+
#include "ray/common/metrics.h"
2223
#include "ray/common/ray_config.h"
2324
#include "ray/util/logging.h"
2425

@@ -56,7 +57,10 @@ class instrumented_io_context : public boost::asio::io_context {
5657
/// for the provided handler.
5758
void dispatch(std::function<void()> handler, std::string name);
5859

59-
EventTracker &stats() const { return *event_stats_; };
60+
std::shared_ptr<EventTracker> stats() const { return event_stats_; };
61+
62+
ray::stats::Gauge io_context_event_loop_lag_ms_gauge_metric{
63+
ray::GetIoContextEventLoopLagMsGaugeMetric()};
6064

6165
private:
6266
/// The event stats tracker to use to record asio handler stats to.

src/ray/common/asio/periodical_runner.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ void PeriodicalRunner::DoRunFnPeriodicallyInstrumented(
107107
// which the handler was elgible to execute on the event loop but was queued by the
108108
// event loop.
109109
auto stats_handle =
110-
io_service_.stats().RecordStart(name, false, period.total_nanoseconds());
110+
io_service_.stats()->RecordStart(name, false, period.total_nanoseconds());
111111
timer->async_wait(
112112
[weak_self = weak_from_this(),
113113
fn = std::move(fn),
@@ -116,7 +116,7 @@ void PeriodicalRunner::DoRunFnPeriodicallyInstrumented(
116116
stats_handle = std::move(stats_handle),
117117
name = std::move(name)](const boost::system::error_code &error) mutable {
118118
if (auto self = weak_self.lock(); self) {
119-
self->io_service_.stats().RecordExecution(
119+
self->io_service_.stats()->RecordExecution(
120120
[self,
121121
fn = std::move(fn),
122122
error,

src/ray/common/event_stats.cc

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
#include <utility>
2222

2323
#include "ray/stats/metric.h"
24-
#include "ray/stats/metric_defs.h"
2524
#include "ray/util/time.h"
2625

2726
namespace {
@@ -66,9 +65,9 @@ std::shared_ptr<StatsHandle> EventTracker::RecordStart(
6665
}
6766

6867
if (emit_metrics) {
69-
ray::stats::STATS_operation_count.Record(1, event_context_name.value_or(name));
70-
ray::stats::STATS_operation_active_count.Record(curr_count,
71-
event_context_name.value_or(name));
68+
operation_count_metric_.Record(1, {{"Name", event_context_name.value_or(name)}});
69+
operation_active_gauge_metric_.Record(curr_count,
70+
{{"Name", event_context_name.value_or(name)}});
7271
}
7372

7473
return std::make_shared<StatsHandle>(
@@ -89,10 +88,11 @@ void EventTracker::RecordEnd(std::shared_ptr<StatsHandle> handle) {
8988

9089
if (handle->emit_stats) {
9190
// Update event-specific stats.
92-
ray::stats::STATS_operation_run_time_ms.Record(
93-
execution_time_ns / 1000000, handle->context_name.value_or(handle->event_name));
94-
ray::stats::STATS_operation_active_count.Record(
95-
curr_count, handle->context_name.value_or(handle->event_name));
91+
operation_run_time_ms_histogram_metric_.Record(
92+
execution_time_ns / 1000000,
93+
{{"Name", handle->context_name.value_or(handle->event_name)}});
94+
operation_active_gauge_metric_.Record(
95+
curr_count, {{"Name", handle->context_name.value_or(handle->event_name)}});
9696
}
9797

9898
handle->end_or_execution_recorded = true;
@@ -135,13 +135,15 @@ void EventTracker::RecordExecution(const std::function<void()> &fn,
135135

136136
if (handle->emit_stats) {
137137
// Update event-specific stats.
138-
ray::stats::STATS_operation_run_time_ms.Record(
139-
execution_time_ns / 1000000, handle->context_name.value_or(handle->event_name));
140-
ray::stats::STATS_operation_active_count.Record(
141-
curr_count, handle->context_name.value_or(handle->event_name));
138+
operation_run_time_ms_histogram_metric_.Record(
139+
execution_time_ns / 1000000,
140+
{{"Name", handle->context_name.value_or(handle->event_name)}});
141+
operation_active_gauge_metric_.Record(
142+
curr_count, {{"Name", handle->context_name.value_or(handle->event_name)}});
142143
// Update global stats.
143-
ray::stats::STATS_operation_queue_time_ms.Record(
144-
queue_time_ns / 1000000, handle->context_name.value_or(handle->event_name));
144+
operation_queue_time_ms_histogram_metric_.Record(
145+
queue_time_ns / 1000000,
146+
{{"Name", handle->context_name.value_or(handle->event_name)}});
145147
}
146148

147149
{

src/ray/common/event_stats.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include "absl/container/flat_hash_map.h"
2020
#include "absl/synchronization/mutex.h"
21+
#include "ray/common/metrics.h"
2122
#include "ray/common/ray_config.h"
2223
#include "ray/util/logging.h"
2324

@@ -132,14 +133,14 @@ class EventTracker {
132133
///
133134
/// \param fn The function to execute and instrument.
134135
/// \param handle An opaque stats handle returned by RecordStart().
135-
static void RecordExecution(const std::function<void()> &fn,
136-
std::shared_ptr<StatsHandle> handle);
136+
void RecordExecution(const std::function<void()> &fn,
137+
std::shared_ptr<StatsHandle> handle);
137138

138139
/// Records the end of an event. This is used in conjunction
139140
/// with RecordStart() to manually instrument an event.
140141
///
141142
/// \param handle An opaque stats handle returned by RecordStart().
142-
static void RecordEnd(std::shared_ptr<StatsHandle> handle);
143+
void RecordEnd(std::shared_ptr<StatsHandle> handle);
143144

144145
/// Returns a snapshot view of the global count, queueing, and execution statistics
145146
/// across all handlers.
@@ -188,4 +189,12 @@ class EventTracker {
188189

189190
/// Protects access to the per-handler post stats table.
190191
mutable absl::Mutex mutex_;
192+
193+
ray::stats::Count operation_count_metric_{ray::GetOperationCountCounterMetric()};
194+
ray::stats::Gauge operation_active_gauge_metric_{
195+
ray::GetOperationActiveCountGaugeMetric()};
196+
ray::stats::Histogram operation_run_time_ms_histogram_metric_{
197+
ray::GetOperationRunTimeMsHistogramMetric()};
198+
ray::stats::Histogram operation_queue_time_ms_histogram_metric_{
199+
ray::GetOperationQueueTimeMsHistogramMetric()};
191200
};

src/ray/common/metrics.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,51 @@ inline ray::stats::Histogram GetSchedulerPlacementTimeSHistogramMetric() {
8686
};
8787
}
8888

89+
inline ray::stats::Gauge GetIoContextEventLoopLagMsGaugeMetric() {
90+
return ray::stats::Gauge{
91+
/*name=*/"io_context_event_loop_lag_ms",
92+
/*description=*/"The latency of a task from post to execution",
93+
/*unit=*/"ms",
94+
/*tag_keys=*/{"Name"},
95+
};
96+
}
97+
98+
inline ray::stats::Count GetOperationCountCounterMetric() {
99+
return ray::stats::Count{
100+
/*name=*/"operation_count",
101+
/*description=*/"operation count",
102+
/*unit=*/"",
103+
/*tag_keys=*/{"Name"},
104+
};
105+
}
106+
107+
inline ray::stats::Histogram GetOperationRunTimeMsHistogramMetric() {
108+
return ray::stats::Histogram{
109+
/*name=*/"operation_run_time_ms",
110+
/*description=*/"operation execution time",
111+
/*unit=*/"ms",
112+
/*boundaries=*/{1, 10, 100, 1000, 10000},
113+
/*tag_keys=*/{"Name"},
114+
};
115+
}
116+
117+
inline ray::stats::Histogram GetOperationQueueTimeMsHistogramMetric() {
118+
return ray::stats::Histogram{
119+
/*name=*/"operation_queue_time_ms",
120+
/*description=*/"operation queuing time",
121+
/*unit=*/"ms",
122+
/*boundaries=*/{1, 10, 100, 1000, 10000},
123+
/*tag_keys=*/{"Name"},
124+
};
125+
}
126+
127+
inline ray::stats::Gauge GetOperationActiveCountGaugeMetric() {
128+
return ray::stats::Gauge{
129+
/*name=*/"operation_active_count",
130+
/*description=*/"active operation number",
131+
/*unit=*/"",
132+
/*tag_keys=*/{"Name"},
133+
};
134+
}
135+
89136
} // namespace ray

src/ray/core_worker/core_worker.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,10 +455,10 @@ CoreWorker::CoreWorker(
455455
periodical_runner_->RunFnPeriodically(
456456
[this] {
457457
RAY_LOG(INFO) << "Event stats:\n\n"
458-
<< io_service_.stats().StatsString() << "\n\n"
458+
<< io_service_.stats()->StatsString() << "\n\n"
459459
<< "-----------------\n"
460460
<< "Task execution event stats:\n"
461-
<< task_execution_service_.stats().StatsString() << "\n\n"
461+
<< task_execution_service_.stats()->StatsString() << "\n\n"
462462
<< "-----------------\n"
463463
<< "Task Event stats:\n"
464464
<< task_event_buffer_->DebugString() << "\n";

src/ray/core_worker/task_event_buffer.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,7 @@ std::string TaskEventBufferImpl::DebugString() {
10121012

10131013
auto stats = stats_counter_.GetAll();
10141014
ss << "\nIO Service Stats:\n";
1015-
ss << io_service_.stats().StatsString();
1015+
ss << io_service_.stats()->StatsString();
10161016
ss << "\nOther Stats:"
10171017
<< "\n\tgcs_grpc_in_progress:" << gcs_grpc_in_progress_
10181018
<< "\n\tevent_aggregator_grpc_in_progress:" << event_aggregator_grpc_in_progress_

src/ray/gcs/gcs_server.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -935,11 +935,11 @@ void GcsServer::PrintDebugState() const {
935935
RayConfig::instance().event_stats_print_interval_ms();
936936
if (event_stats_print_interval_ms != -1 && RayConfig::instance().event_stats()) {
937937
RAY_LOG(INFO) << "Main service Event stats:\n\n"
938-
<< io_context_provider_.GetDefaultIOContext().stats().StatsString()
938+
<< io_context_provider_.GetDefaultIOContext().stats()->StatsString()
939939
<< "\n\n";
940940
for (const auto &io_context : io_context_provider_.GetAllDedicatedIOContexts()) {
941941
RAY_LOG(INFO) << io_context->GetName() << " Event stats:\n\n"
942-
<< io_context->GetIoService().stats().StatsString() << "\n\n";
942+
<< io_context->GetIoService().stats()->StatsString() << "\n\n";
943943
}
944944
}
945945
}

0 commit comments

Comments
 (0)