From 299a0f884f96eaf44aac8796c1b3be3e4929031c Mon Sep 17 00:00:00 2001 From: zhilong Date: Tue, 30 Jul 2024 13:11:39 -0400 Subject: [PATCH 1/6] fix_file_detection Signed-off-by: zhilong --- src/ray/common/file_system_monitor.cc | 26 +++++++++++++++++++++++--- src/ray/common/file_system_monitor.h | 3 +++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/ray/common/file_system_monitor.cc b/src/ray/common/file_system_monitor.cc index 014596679327..e74e7f82280c 100644 --- a/src/ray/common/file_system_monitor.cc +++ b/src/ray/common/file_system_monitor.cc @@ -34,6 +34,9 @@ FileSystemMonitor::FileSystemMonitor(std::vector paths, io_context_.run(); }), runner_(io_context_) { + if (!paths_.empty()) { + InitializeInitialCapacity(paths_[0]); + } runner_.RunFnPeriodically([this] { over_capacity_ = CheckIfAnyPathOverCapacity(); }, monitor_interval_ms, "FileSystemMonitor.CheckIfAnyPathOverCapacity"); @@ -92,26 +95,43 @@ bool FileSystemMonitor::OverCapacityImpl( if (!space_info.has_value()) { return false; } + if (space_info->capacity <= 0) { RAY_LOG_EVERY_MS(ERROR, 60 * 1000) << path << " has no capacity, object creation will fail if spilling is required."; return true; } - if ((1 - 1.0f * space_info->available / space_info->capacity) < capacity_threshold_) { + uint64_t capacity = initial_capacity_.load(); + if (capacity == 0) { + capacity = space_info->capacity; + } + + if ((1 - 1.0f * space_info->available / capacity) < capacity_threshold_) { return false; } + // Convert bytes to GB + double available_gb = static_cast(space_info->available) / (1024 * 1024 * 1024); + double capacity_gb = static_cast(capacity) / (1024 * 1024 * 1024); + std::ostringstream ostr; ostr << path << " is over " << capacity_threshold_ * 100 - << "\% full, available space: " << space_info->available - << "; capacity: " << space_info->capacity + << "\% full, available space: " << available_gb << " GB" + << "; capacity: " << capacity_gb << " GB" << ". Object creation will fail if spilling is required."; RAY_EVENT_EVERY_MS(ERROR, "Out of Disk", 10 * 1000) << ostr.str(); RAY_LOG_EVERY_MS(ERROR, 10 * 1000) << ostr.str(); return true; } +void FileSystemMonitor::InitializeInitialCapacity(const std::string &path) { + auto space_info = Space(path); + if (space_info.has_value()) { + initial_capacity_.store(space_info->available); + } +} + std::vector ParseSpillingPaths(const std::string &spilling_config) { std::vector spilling_paths; diff --git a/src/ray/common/file_system_monitor.h b/src/ray/common/file_system_monitor.h index 644e28c62cd0..65c358ff35ef 100644 --- a/src/ray/common/file_system_monitor.h +++ b/src/ray/common/file_system_monitor.h @@ -63,11 +63,14 @@ class FileSystemMonitor { bool OverCapacityImpl(const std::string &path, const std::optional &info) const; + void InitializeInitialCapacity(const std::string &path); + private: FRIEND_TEST(FileSystemTest, TestOverCapacity); const std::vector paths_; const double capacity_threshold_; std::atomic over_capacity_; + std::atomic initial_capacity_{0}; instrumented_io_context io_context_; std::thread monitor_thread_; PeriodicalRunner runner_; From 407572e4991eb24a38fd9b648cae8f2b3239ba90 Mon Sep 17 00:00:00 2001 From: zhilong Date: Tue, 6 Aug 2024 12:02:15 -0400 Subject: [PATCH 2/6] fix Signed-off-by: zhilong --- src/ray/common/file_system_monitor.cc | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/ray/common/file_system_monitor.cc b/src/ray/common/file_system_monitor.cc index e74e7f82280c..5a1923609c0c 100644 --- a/src/ray/common/file_system_monitor.cc +++ b/src/ray/common/file_system_monitor.cc @@ -34,9 +34,6 @@ FileSystemMonitor::FileSystemMonitor(std::vector paths, io_context_.run(); }), runner_(io_context_) { - if (!paths_.empty()) { - InitializeInitialCapacity(paths_[0]); - } runner_.RunFnPeriodically([this] { over_capacity_ = CheckIfAnyPathOverCapacity(); }, monitor_interval_ms, "FileSystemMonitor.CheckIfAnyPathOverCapacity"); @@ -95,25 +92,19 @@ bool FileSystemMonitor::OverCapacityImpl( if (!space_info.has_value()) { return false; } - if (space_info->capacity <= 0) { RAY_LOG_EVERY_MS(ERROR, 60 * 1000) << path << " has no capacity, object creation will fail if spilling is required."; return true; } - uint64_t capacity = initial_capacity_.load(); - if (capacity == 0) { - capacity = space_info->capacity; - } - - if ((1 - 1.0f * space_info->available / capacity) < capacity_threshold_) { + if ((1 - 1.0f * space_info->available / space_info->capacity) < capacity_threshold_) { return false; } // Convert bytes to GB double available_gb = static_cast(space_info->available) / (1024 * 1024 * 1024); - double capacity_gb = static_cast(capacity) / (1024 * 1024 * 1024); + double capacity_gb = static_cast(space_info->capacity) / (1024 * 1024 * 1024); std::ostringstream ostr; ostr << path << " is over " << capacity_threshold_ * 100 @@ -125,13 +116,6 @@ bool FileSystemMonitor::OverCapacityImpl( return true; } -void FileSystemMonitor::InitializeInitialCapacity(const std::string &path) { - auto space_info = Space(path); - if (space_info.has_value()) { - initial_capacity_.store(space_info->available); - } -} - std::vector ParseSpillingPaths(const std::string &spilling_config) { std::vector spilling_paths; @@ -159,4 +143,4 @@ std::vector ParseSpillingPaths(const std::string &spilling_config) } return spilling_paths; } -} // namespace ray +} // namespace ray \ No newline at end of file From c29f3782092faa1948eac5e2ab786e3f6b559117 Mon Sep 17 00:00:00 2001 From: zhilong Date: Tue, 6 Aug 2024 17:40:59 -0400 Subject: [PATCH 3/6] fix Signed-off-by: zhilong --- src/ray/common/file_system_monitor.cc | 2 +- src/ray/common/file_system_monitor.h | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/ray/common/file_system_monitor.cc b/src/ray/common/file_system_monitor.cc index 5a1923609c0c..5d96347922b3 100644 --- a/src/ray/common/file_system_monitor.cc +++ b/src/ray/common/file_system_monitor.cc @@ -143,4 +143,4 @@ std::vector ParseSpillingPaths(const std::string &spilling_config) } return spilling_paths; } -} // namespace ray \ No newline at end of file +} // namespace ray diff --git a/src/ray/common/file_system_monitor.h b/src/ray/common/file_system_monitor.h index 65c358ff35ef..60ce23fa4f97 100644 --- a/src/ray/common/file_system_monitor.h +++ b/src/ray/common/file_system_monitor.h @@ -63,18 +63,15 @@ class FileSystemMonitor { bool OverCapacityImpl(const std::string &path, const std::optional &info) const; - void InitializeInitialCapacity(const std::string &path); - private: FRIEND_TEST(FileSystemTest, TestOverCapacity); const std::vector paths_; const double capacity_threshold_; std::atomic over_capacity_; - std::atomic initial_capacity_{0}; instrumented_io_context io_context_; std::thread monitor_thread_; PeriodicalRunner runner_; }; std::vector ParseSpillingPaths(const std::string &spilling_config); -} // namespace ray +} // namespace ray \ No newline at end of file From 156610a6e4d006b889332cfce8be71b6a8d9c64f Mon Sep 17 00:00:00 2001 From: zhilong Date: Tue, 6 Aug 2024 17:41:37 -0400 Subject: [PATCH 4/6] fix Signed-off-by: zhilong --- src/ray/common/file_system_monitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ray/common/file_system_monitor.h b/src/ray/common/file_system_monitor.h index 60ce23fa4f97..644e28c62cd0 100644 --- a/src/ray/common/file_system_monitor.h +++ b/src/ray/common/file_system_monitor.h @@ -74,4 +74,4 @@ class FileSystemMonitor { }; std::vector ParseSpillingPaths(const std::string &spilling_config); -} // namespace ray \ No newline at end of file +} // namespace ray From 867cae14c8f5afedc794504bb0dd4c17ab742627 Mon Sep 17 00:00:00 2001 From: zhilong <121425509+Bye-legumes@users.noreply.github.com> Date: Tue, 29 Oct 2024 13:30:23 -0400 Subject: [PATCH 5/6] Update src/ray/common/file_system_monitor.cc Co-authored-by: Jiajun Yao Signed-off-by: zhilong <121425509+Bye-legumes@users.noreply.github.com> --- src/ray/common/file_system_monitor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ray/common/file_system_monitor.cc b/src/ray/common/file_system_monitor.cc index 5d96347922b3..1c92b84bf2cb 100644 --- a/src/ray/common/file_system_monitor.cc +++ b/src/ray/common/file_system_monitor.cc @@ -103,7 +103,7 @@ bool FileSystemMonitor::OverCapacityImpl( } // Convert bytes to GB - double available_gb = static_cast(space_info->available) / (1024 * 1024 * 1024); + const double available_gb = static_cast(space_info->available) / (1024 * 1024 * 1024); double capacity_gb = static_cast(space_info->capacity) / (1024 * 1024 * 1024); std::ostringstream ostr; From f4227e665bfbe8cda2e082acd672456954507ccb Mon Sep 17 00:00:00 2001 From: zhilong Date: Tue, 29 Oct 2024 13:45:55 -0400 Subject: [PATCH 6/6] fix Signed-off-by: zhilong --- src/ray/common/file_system_monitor.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ray/common/file_system_monitor.cc b/src/ray/common/file_system_monitor.cc index 1c92b84bf2cb..4a5ee7fd413f 100644 --- a/src/ray/common/file_system_monitor.cc +++ b/src/ray/common/file_system_monitor.cc @@ -103,7 +103,8 @@ bool FileSystemMonitor::OverCapacityImpl( } // Convert bytes to GB - const double available_gb = static_cast(space_info->available) / (1024 * 1024 * 1024); + const double available_gb = + static_cast(space_info->available) / (1024 * 1024 * 1024); double capacity_gb = static_cast(space_info->capacity) / (1024 * 1024 * 1024); std::ostringstream ostr;