Skip to content

Commit

Permalink
[core][gcs] Remove ByteSizeLong call from GcsTaskManager (#41108)
Browse files Browse the repository at this point in the history
We were tracking stored tasks event size from ByteSizeLong() calls, which is showing high overheads. Since we are already tracking the number of task attempts, we could get a proxy over the size and thus dropping the size tracking completely.
  • Loading branch information
rickyyx authored Nov 17, 2023
1 parent 8209893 commit 7a75d09
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 16 deletions.
8 changes: 0 additions & 8 deletions src/ray/gcs/gcs_server/gcs_task_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,7 @@ void GcsTaskManager::GcsTaskManagerStorage::UpdateExistingTaskAttempt(
}

// Update the task event.
stats_counter_.Decrement(kNumTaskEventsBytesStored, existing_task.ByteSizeLong());
existing_task.MergeFrom(task_events);
stats_counter_.Increment(kNumTaskEventsBytesStored, existing_task.ByteSizeLong());

// Move the task events around different gc priority list.
auto target_list_index = gc_policy_->GetTaskListPriority(existing_task);
Expand Down Expand Up @@ -182,7 +180,6 @@ GcsTaskManager::GcsTaskManagerStorage::AddNewTaskEvent(rpc::TaskEvents &&task_ev
const auto &added_task_events = loc->GetTaskEventsMutable();

// Stats tracking
stats_counter_.Increment(kNumTaskEventsBytesStored, added_task_events.ByteSizeLong());
stats_counter_.Increment(kNumTaskEventsStored);
// Bump the task counters by type.
if (added_task_events.has_task_info() && added_task_events.attempt_number() == 0) {
Expand Down Expand Up @@ -279,7 +276,6 @@ void GcsTaskManager::GcsTaskManagerStorage::RemoveTaskAttempt(
// Update the tracking
job_task_summary_[job_id].RecordProfileEventsDropped(NumProfileEvents(to_remove));
job_task_summary_[job_id].RecordTaskAttemptDropped(GetTaskAttempt(to_remove));
stats_counter_.Decrement(kNumTaskEventsBytesStored, to_remove.ByteSizeLong());
stats_counter_.Decrement(kNumTaskEventsStored);
stats_counter_.Increment(kTotalNumTaskAttemptsDropped);
stats_counter_.Increment(kTotalNumProfileTaskEventsDropped,
Expand Down Expand Up @@ -498,8 +494,6 @@ std::string GcsTaskManager::DebugString() {
<< "\n-Total num status task events dropped: "
<< counters[kTotalNumTaskAttemptsDropped] << "\n-Total num profile events dropped: "
<< counters[kTotalNumProfileTaskEventsDropped]
<< "\n-Total num bytes of task event stored: "
<< 1.0 * counters[kNumTaskEventsBytesStored] / 1024 / 1024 << "MiB"
<< "\n-Current num of task events stored: " << counters[kNumTaskEventsStored]
<< "\n-Total num of actor creation tasks: " << counters[kTotalNumActorCreationTask]
<< "\n-Total num of actor tasks: " << counters[kTotalNumActorTask]
Expand All @@ -521,8 +515,6 @@ void GcsTaskManager::RecordMetrics() {

ray::stats::STATS_gcs_task_manager_task_events_stored.Record(
counters[kNumTaskEventsStored]);
ray::stats::STATS_gcs_task_manager_task_events_stored_bytes.Record(
counters[kNumTaskEventsBytesStored]);

{
absl::MutexLock lock(&mutex_);
Expand Down
1 change: 0 additions & 1 deletion src/ray/gcs/gcs_server/gcs_task_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ enum GcsTaskManagerCounter {
kTotalNumTaskEventsReported,
kTotalNumTaskAttemptsDropped,
kTotalNumProfileTaskEventsDropped,
kNumTaskEventsBytesStored,
kNumTaskEventsStored,
kTotalNumActorCreationTask,
kTotalNumActorTask,
Expand Down
6 changes: 0 additions & 6 deletions src/ray/stats/metric_defs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,12 +349,6 @@ DEFINE_stats(gcs_task_manager_task_events_stored,
(),
ray::stats::GAUGE);

DEFINE_stats(gcs_task_manager_task_events_stored_bytes,
"Number of bytes of all task events stored in GCS.",
(),
(),
ray::stats::GAUGE);

/// Memory Manager
DEFINE_stats(
memory_manager_worker_eviction_total,
Expand Down
1 change: 0 additions & 1 deletion src/ray/stats/metric_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ DECLARE_stats(gcs_storage_operation_latency_ms);
DECLARE_stats(gcs_storage_operation_count);
DECLARE_stats(gcs_task_manager_task_events_dropped);
DECLARE_stats(gcs_task_manager_task_events_stored);
DECLARE_stats(gcs_task_manager_task_events_stored_bytes);
DECLARE_stats(gcs_task_manager_task_events_reported);

/// Object Store
Expand Down

0 comments on commit 7a75d09

Please sign in to comment.