1616
1717#include < algorithm>
1818#include < filesystem>
19+ #include < fstream>
1920#include < memory>
21+ #include < sstream>
2022#include < string>
23+ #include < type_traits>
2124#include < unordered_set>
2225#include < utility>
2326
@@ -40,10 +43,12 @@ CgroupManager::CgroupManager(std::string base_cgroup,
4043 node_cgroup_ + std::filesystem::path::preferred_separator + kSystemCgroupName ;
4144 system_leaf_cgroup_ =
4245 system_cgroup_ + std::filesystem::path::preferred_separator + kLeafCgroupName ;
43- application_cgroup_ =
44- node_cgroup_ + std::filesystem::path::preferred_separator + kApplicationCgroupName ;
45- application_leaf_cgroup_ =
46- application_cgroup_ + std::filesystem::path::preferred_separator + kLeafCgroupName ;
46+ user_cgroup_ =
47+ node_cgroup_ + std::filesystem::path::preferred_separator + kUserCgroupName ;
48+ workers_cgroup_ =
49+ user_cgroup_ + std::filesystem::path::preferred_separator + kWorkersCgroupName ;
50+ non_ray_cgroup_ =
51+ user_cgroup_ + std::filesystem::path::preferred_separator + kNonRayCgroupName ;
4752}
4853
4954CgroupManager::~CgroupManager () {
@@ -56,17 +61,19 @@ CgroupManager::CgroupManager(CgroupManager &&other)
5661 : node_cgroup_(std::move(other.node_cgroup_)),
5762 system_cgroup_ (std::move(other.system_cgroup_)),
5863 system_leaf_cgroup_(std::move(other.system_leaf_cgroup_)),
59- application_cgroup_(std::move(other.application_cgroup_)),
60- application_leaf_cgroup_(std::move(other.application_leaf_cgroup_)),
64+ user_cgroup_(std::move(other.user_cgroup_)),
65+ workers_cgroup_(std::move(other.workers_cgroup_)),
66+ non_ray_cgroup_(std::move(other.non_ray_cgroup_)),
6167 cleanup_operations_(std::move(other.cleanup_operations_)),
6268 cgroup_driver_(std::move(other.cgroup_driver_)) {}
6369
6470CgroupManager &CgroupManager::operator =(CgroupManager &&other) {
6571 node_cgroup_ = std::move (other.node_cgroup_ );
6672 system_cgroup_ = std::move (other.system_cgroup_ );
6773 system_leaf_cgroup_ = std::move (other.system_leaf_cgroup_ );
68- application_cgroup_ = std::move (other.application_cgroup_ );
69- application_leaf_cgroup_ = std::move (other.application_leaf_cgroup_ );
74+ user_cgroup_ = std::move (other.user_cgroup_ );
75+ workers_cgroup_ = std::move (other.workers_cgroup_ );
76+ non_ray_cgroup_ = std::move (other.non_ray_cgroup_ );
7077 cleanup_operations_ = std::move (other.cleanup_operations_ );
7178 cgroup_driver_ = std::move (other.cgroup_driver_ );
7279 return *this ;
@@ -202,17 +209,16 @@ Status CgroupManager::Initialize(int64_t system_reserved_cpu_weight,
202209 std::string supported_controllers =
203210 absl::StrCat (" [" , absl::StrJoin (supported_controllers_, " , " ), " ]" );
204211
205- // The cpu.weight is distributed between the system and application cgroups.
206- // The application cgroup gets whatever is leftover from the system cgroup.
207- int64_t application_cgroup_cpu_weight =
208- cpu_weight_constraint_.Max () - system_reserved_cpu_weight;
212+ int64_t user_cpu_weight = cpu_weight_constraint_.Max () - system_reserved_cpu_weight;
209213
210214 RAY_LOG (INFO) << absl::StrFormat (
211215 " Initializing CgroupManager at base cgroup at '%s'. Ray's cgroup "
212- " hierarchy will under the node cgroup at '%s'. The %s controllers will be "
213- " enabled. "
214- " The system cgroup at '%s' will have constraints [%s=%lld, %s=%lld]. "
215- " The application cgroup '%s' will have constraints [%s=%lld]." ,
216+ " hierarchy will under the node cgroup at '%s' with %s controllers enabled. "
217+ " The system cgroup at '%s' will have [memory] controllers enabled with "
218+ " [%s=%lld, %s=%lld] constraints. "
219+ " The user cgroup '%s' will have no controllers enabled with [%s=%lld] "
220+ " constraints. "
221+ " The user cgroup will contain the [%s, %s] cgroups." ,
216222 base_cgroup_,
217223 node_cgroup_,
218224 supported_controllers,
@@ -221,21 +227,23 @@ Status CgroupManager::Initialize(int64_t system_reserved_cpu_weight,
221227 system_reserved_cpu_weight,
222228 memory_min_constraint_.name_ ,
223229 system_reserved_memory_bytes,
224- application_cgroup_ ,
230+ user_cgroup_ ,
225231 cpu_weight_constraint_.name_ ,
226- application_cgroup_cpu_weight);
227-
228- // Create the cgroup heirarchy:
229- // base_cgroup_path (e.g. /sys/fs/cgroup)
230- // |
231- // ray_node_<node_id>
232- // | |
233- // system application
234- // | |
235- // leaf leaf
236- //
237- // There need to be two cgroups as leaf nodes because of the no
238- // internal processes constraint.
232+ user_cpu_weight,
233+ workers_cgroup_,
234+ non_ray_cgroup_);
235+
236+ // Create the cgroup hierarchy:
237+ // base_cgroup_path (e.g. /sys/fs/cgroup)
238+ // |
239+ // ray-node_<node_id>
240+ // | |
241+ // system user
242+ // | | |
243+ // leaf workers non-ray
244+
245+ // There need to be leaf cgroups because of the no the internal processes
246+ // constraint.
239247 RAY_RETURN_NOT_OK (cgroup_driver_->CreateCgroup (node_cgroup_));
240248 RegisterDeleteCgroup (node_cgroup_);
241249
@@ -245,28 +253,41 @@ Status CgroupManager::Initialize(int64_t system_reserved_cpu_weight,
245253 RAY_RETURN_NOT_OK (cgroup_driver_->CreateCgroup (system_leaf_cgroup_));
246254 RegisterDeleteCgroup (system_leaf_cgroup_);
247255
248- RAY_RETURN_NOT_OK (cgroup_driver_->CreateCgroup (application_cgroup_ ));
249- RegisterDeleteCgroup (application_cgroup_ );
256+ RAY_RETURN_NOT_OK (cgroup_driver_->CreateCgroup (user_cgroup_ ));
257+ RegisterDeleteCgroup (user_cgroup_ );
250258
251- RAY_RETURN_NOT_OK (cgroup_driver_->CreateCgroup (application_leaf_cgroup_ ));
252- RegisterDeleteCgroup (application_leaf_cgroup_ );
259+ RAY_RETURN_NOT_OK (cgroup_driver_->CreateCgroup (workers_cgroup_ ));
260+ RegisterDeleteCgroup (workers_cgroup_ );
253261
254262 // Move all processes from the base_cgroup into the system_leaf_cgroup to make sure
263+ RAY_RETURN_NOT_OK (cgroup_driver_->CreateCgroup (non_ray_cgroup_));
264+ RegisterDeleteCgroup (non_ray_cgroup_);
265+
266+ // Move all processes from the base_cgroup into the non-ray cgroup to make sure
255267 // that the no internal process constraint is not violated. This is relevant
256- // when the base_cgroup is not a root cgroup for the system. This is likely
257- // the case if Ray is running inside a container.
258- RAY_RETURN_NOT_OK (cgroup_driver_->MoveAllProcesses (base_cgroup_, system_leaf_cgroup_));
268+ // when the base_cgroup is not the OS's root cgroup. This is the case when
269+ // Ray is running inside a container.
270+ RAY_RETURN_NOT_OK (cgroup_driver_->MoveAllProcesses (base_cgroup_, non_ray_cgroup_));
271+ RegisterMoveAllProcesses (non_ray_cgroup_, base_cgroup_);
272+
273+ // NOTE: Since the raylet does not own the lifecycle of all system processes,
274+ // there's no guarantee that there are no pids in the system leaf cgroup.
275+ // Therefore, pids need to be migrated out of the system cgroup to delete it.
259276 RegisterMoveAllProcesses (system_leaf_cgroup_, base_cgroup_);
260277
261- for (const auto &ctrl : supported_controllers_) {
262- RAY_RETURN_NOT_OK (cgroup_driver_->EnableController (base_cgroup_, ctrl));
263- RegisterDisableController (base_cgroup_, ctrl);
264- RAY_RETURN_NOT_OK (cgroup_driver_->EnableController (node_cgroup_, ctrl));
265- RegisterDisableController (node_cgroup_, ctrl);
266- RAY_RETURN_NOT_OK (cgroup_driver_->EnableController (system_cgroup_, ctrl));
267- RegisterDisableController (system_cgroup_, ctrl);
268- RAY_RETURN_NOT_OK (cgroup_driver_->EnableController (application_cgroup_, ctrl));
269- RegisterDisableController (application_cgroup_, ctrl);
278+ std::array<const std::string *, 2 > cpu_controlled_cgroups{&base_cgroup_, &node_cgroup_};
279+ std::array<const std::string *, 3 > memory_controlled_cgroups{
280+ &base_cgroup_, &node_cgroup_, &system_cgroup_};
281+
282+ for (const std::string *cpu_controlled_cgroup : cpu_controlled_cgroups) {
283+ RAY_RETURN_NOT_OK (cgroup_driver_->EnableController (*cpu_controlled_cgroup, " cpu" ));
284+ RegisterDisableController (*cpu_controlled_cgroup, " cpu" );
285+ }
286+
287+ for (const std::string *memory_controlled_cgroup : memory_controlled_cgroups) {
288+ RAY_RETURN_NOT_OK (
289+ cgroup_driver_->EnableController (*memory_controlled_cgroup, " memory" ));
290+ RegisterDisableController (*memory_controlled_cgroup, " memory" );
270291 }
271292
272293 RAY_RETURN_NOT_OK (
@@ -283,12 +304,12 @@ Status CgroupManager::Initialize(int64_t system_reserved_cpu_weight,
283304 std::to_string (system_reserved_memory_bytes)));
284305 RegisterRemoveConstraint (system_cgroup_, memory_min_constraint_);
285306
286- RAY_RETURN_NOT_OK (
287- cgroup_driver_-> AddConstraint (application_cgroup_ ,
288- cpu_weight_constraint_.controller_ ,
289- cpu_weight_constraint_. name_ ,
290- std::to_string (application_cgroup_cpu_weight)) );
291- RegisterRemoveConstraint (application_cgroup_, cpu_weight_constraint_);
307+ RAY_RETURN_NOT_OK (cgroup_driver_-> AddConstraint (user_cgroup_,
308+ cpu_weight_constraint_. controller_ ,
309+ cpu_weight_constraint_.name_ ,
310+ std::to_string (user_cpu_weight)));
311+ RegisterRemoveConstraint (user_cgroup_, cpu_weight_constraint_ );
312+
292313 return Status::OK ();
293314}
294315
@@ -308,8 +329,8 @@ Status CgroupManager::AddProcessToCgroup(const std::string &cgroup,
308329 return s;
309330}
310331
311- Status CgroupManager::AddProcessToApplicationCgroup (const std::string &pid) {
312- return AddProcessToCgroup (application_leaf_cgroup_ , pid);
332+ Status CgroupManager::AddProcessToWorkersCgroup (const std::string &pid) {
333+ return AddProcessToCgroup (workers_cgroup_ , pid);
313334}
314335
315336Status CgroupManager::AddProcessToSystemCgroup (const std::string &pid) {
0 commit comments