Skip to content

Commit 0ce453e

Browse files
israbbaniedoakes
authored andcommitted
[core] (cgroups 2/n) adding integration tests for the cgroup sysfs driver. (ray-project#55063)
Signed-off-by: irabbani <irabbani@anyscale.com> Signed-off-by: Ibrahim Rabbani <israbbani@gmail.com> Signed-off-by: Ibrahim Rabbani <irabbani@anyscale.com> Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com> Signed-off-by: sampan <sampan@anyscale.com>
1 parent 598594a commit 0ce453e

13 files changed

+1319
-208
lines changed

.buildkite/core.rayci.yml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -291,15 +291,14 @@ steps:
291291
- "3.11"
292292
- "3.12"
293293
- "3.13"
294-
295-
# cpp tests
296294
- label: ":ray: core: cgroup tests"
297295
tags: core_cpp
298296
instance_type: medium
299297
commands:
300-
- bazel run //ci/ray_ci:test_in_docker -- //:all //src/... core --only-tags=cgroup --build-type cgroup
301-
--privileged --cache-test-results
302-
298+
- bazel run //ci/ray_ci:test_in_docker -- //:all //src/ray/common/cgroup2/tests/... core --build-type clang --cache-test-results
299+
- docker run --privileged -i --rm --volume /tmp/artifacts:/artifact-mount --shm-size=2.5gb
300+
"$${RAYCI_WORK_REPO}":"$${RAYCI_BUILD_ID}"-corebuild /bin/bash
301+
"./src/ray/common/cgroup2/integration_tests/sysfs_cgroup_driver_integration_test_entrypoint.sh"
303302
- label: ":ray: core: cpp tests"
304303
tags: core_cpp
305304
instance_type: medium

src/ray/common/cgroup2/BUILD.bazel

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ ray_cc_library(
55
hdrs = [
66
"cgroup_driver_interface.h",
77
],
8-
tags = [
9-
"no_windows",
8+
target_compatible_with = [
9+
"@platforms//os:linux",
1010
],
1111
deps = [
1212
"//src/ray/common:status",
@@ -20,8 +20,8 @@ ray_cc_library(
2020
hdrs = [
2121
"sysfs_cgroup_driver.h",
2222
],
23-
tags = [
24-
"no_windows",
23+
target_compatible_with = [
24+
"@platforms//os:linux",
2525
],
2626
deps = [
2727
":cgroup_driver_interface",
@@ -31,3 +31,18 @@ ray_cc_library(
3131
"@com_google_absl//absl/strings",
3232
],
3333
)
34+
35+
ray_cc_library(
36+
name = "cgroup_test_utils",
37+
srcs = ["cgroup_test_utils.cc"],
38+
hdrs = ["cgroup_test_utils.h"],
39+
target_compatible_with = [
40+
"@platforms//os:linux",
41+
],
42+
deps = [
43+
"//src/ray/common:id",
44+
"//src/ray/common:status",
45+
"//src/ray/common:status_or",
46+
"@com_google_absl//absl/strings:str_format",
47+
],
48+
)
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
// Copyright 2025 The Ray Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "ray/common/cgroup2/cgroup_test_utils.h"
16+
17+
#include <errno.h>
18+
#include <fcntl.h>
19+
#include <linux/sched.h>
20+
#include <poll.h>
21+
#include <signal.h>
22+
#include <stdint.h>
23+
#include <sys/stat.h>
24+
#include <sys/syscall.h>
25+
#include <sys/types.h>
26+
#include <sys/wait.h>
27+
#include <unistd.h>
28+
29+
#include <algorithm>
30+
#include <cstdlib>
31+
#include <filesystem>
32+
#include <fstream>
33+
#include <initializer_list>
34+
#include <memory>
35+
#include <stdexcept>
36+
#include <string>
37+
#include <system_error>
38+
#include <utility>
39+
40+
#include "absl/strings/str_format.h"
41+
#include "ray/common/id.h"
42+
#include "ray/common/status.h"
43+
#include "ray/common/status_or.h"
44+
#include "ray/util/logging.h"
45+
46+
ray::StatusOr<std::unique_ptr<TempCgroupDirectory>> TempCgroupDirectory::Create(
47+
const std::string &base_path, mode_t mode) {
48+
std::string random_name = ray::UniqueID::FromRandom().Hex();
49+
std::string name = random_name.substr(0, std::min<size_t>(6, random_name.size()));
50+
std::string path = base_path + std::filesystem::path::preferred_separator + name;
51+
if (mkdir(path.c_str(), mode) == -1) {
52+
return ray::Status::IOError(
53+
absl::StrFormat("Failed to create cgroup directory at path %s.\n"
54+
"Cgroup tests expect tmpfs and cgroupv2 to be mounted "
55+
"and only run on Linux.\n"
56+
"Error: %s",
57+
path,
58+
strerror(errno)));
59+
}
60+
auto output = std::make_unique<TempCgroupDirectory>(std::move(name), std::move(path));
61+
return output;
62+
}
63+
64+
TempCgroupDirectory::~TempCgroupDirectory() noexcept(false) {
65+
RAY_CHECK(rmdir(path_.c_str()) != -1) << absl::StrFormat(
66+
"Failed to delete a cgroup directory at %s with error %s. Please manually "
67+
"delete it with rmdir.",
68+
path_,
69+
strerror(errno));
70+
}
71+
72+
ray::StatusOr<std::unique_ptr<TempDirectory>> TempDirectory::Create() {
73+
std::string path = "/tmp/XXXXXX";
74+
char *ret = mkdtemp(path.data());
75+
if (ret == nullptr) {
76+
return ray::Status::Invalid(
77+
absl::StrFormat("Failed to create a temp directory on tmpfs with error %s."
78+
"Cgroup tests expect tmpfs to be mounted and only run on Linux.",
79+
strerror(errno)));
80+
}
81+
std::unique_ptr<TempDirectory> temp_dir =
82+
std::make_unique<TempDirectory>(std::move(path));
83+
return ray::StatusOr<std::unique_ptr<TempDirectory>>(std::move(temp_dir));
84+
}
85+
86+
TempDirectory::~TempDirectory() {
87+
std::error_code error_code;
88+
RAY_CHECK(std::filesystem::remove_all(path_, error_code)) << absl::StrFormat(
89+
"Failed to delete temp directory at %s with error %s. Please manually "
90+
"delete it with rmdir.",
91+
path_,
92+
error_code.message());
93+
}
94+
95+
/**
96+
Note: clone3 supports creating a process inside a cgroup instead of creating
97+
and then moving. However, clone3 does not have a glibc wrapper and
98+
must be called directly using syscall syscall (see man 2 syscall).
99+
This function needs linux kernel >= 5.7 to use the CLONE_INTO_CGROUP flag.
100+
*/
101+
#ifdef CLONE_INTO_CGROUP
102+
ray::StatusOr<std::pair<pid_t, int>> StartChildProcessInCgroup(
103+
const std::string &cgroup_path) {
104+
int cgroup_fd = open(cgroup_path.c_str(), O_RDONLY);
105+
if (cgroup_fd == -1) {
106+
return ray::Status::InvalidArgument(
107+
absl::StrFormat("Unable to open fd for cgroup at %s with error %s.",
108+
cgroup_path,
109+
strerror(errno)));
110+
}
111+
112+
// Will be set by clone3 if a child process is successfully created.
113+
pid_t child_pidfd = -1;
114+
115+
clone_args cl_args = {};
116+
cl_args.flags = CLONE_PIDFD | CLONE_INTO_CGROUP;
117+
cl_args.cgroup = cgroup_fd;
118+
119+
// Can be used both as a pid and as a fd.
120+
cl_args.pidfd = ((__u64)((uintptr_t)(&child_pidfd)));
121+
122+
int child_pid = -1;
123+
124+
if ((child_pid = syscall(__NR_clone3, &cl_args, sizeof(struct clone_args))) == -1) {
125+
close(cgroup_fd);
126+
return ray::Status::Invalid(
127+
absl::StrFormat("Failed to clone process into cgroup %s with error %s.",
128+
cgroup_path,
129+
strerror(errno)));
130+
}
131+
132+
if (child_pid == 0) {
133+
// Child process will wait for parent to unblock it.
134+
pause();
135+
_exit(0);
136+
}
137+
138+
// Parent process will continue here.
139+
close(cgroup_fd);
140+
return std::make_pair(child_pid, static_cast<int>(child_pidfd));
141+
}
142+
#else
143+
// Fallback for older kernels. Uses fork/exec instead.
144+
ray::StatusOr<std::pair<pid_t, int>> StartChildProcessInCgroup(
145+
const std::string &cgroup_path) {
146+
int new_pid = fork();
147+
if (new_pid == -1) {
148+
return ray::Status::Invalid(
149+
absl::StrFormat("Failed to fork process with error %s.", strerror(errno)));
150+
}
151+
152+
if (new_pid == 0) {
153+
// Child process will pause and wait for parent to terminate and reap it.
154+
pause();
155+
_exit(0);
156+
}
157+
158+
std::string cgroup_proc_file_path = cgroup_path + "/cgroup.procs";
159+
160+
// Parent process has to move the process into a cgroup.
161+
int cgroup_fd = open(cgroup_proc_file_path.c_str(), O_RDWR);
162+
163+
if (cgroup_fd == -1) {
164+
return ray::Status::Invalid(
165+
absl::StrFormat("Failed to open cgroup procs file at path %s with error %s.",
166+
cgroup_proc_file_path,
167+
strerror(errno)));
168+
}
169+
170+
std::string pid_to_write = std::to_string(new_pid);
171+
172+
if (write(cgroup_fd, pid_to_write.c_str(), pid_to_write.size()) == -1) {
173+
// Best effort killing of the child process because we couldn't move it
174+
// into the cgroup.
175+
kill(SIGKILL, new_pid);
176+
close(cgroup_fd);
177+
return ray::Status::Invalid(
178+
absl::StrFormat("Failed to write pid %i to cgroup procs file %s with error %s.",
179+
new_pid,
180+
cgroup_proc_file_path,
181+
strerror(errno)));
182+
}
183+
184+
close(cgroup_fd);
185+
186+
int child_pidfd = static_cast<int>(syscall(SYS_pidfd_open, new_pid, 0));
187+
if (child_pidfd == -1) {
188+
// Best effort killing of the child process because we couldn't create
189+
// a pidfd from the process.
190+
kill(SIGKILL, new_pid);
191+
close(cgroup_fd);
192+
return ray::Status::Invalid(
193+
absl::StrFormat("Failed to create process fd for pid %i with error %s.",
194+
new_pid,
195+
strerror(errno)));
196+
}
197+
return std::make_pair(new_pid, child_pidfd);
198+
}
199+
#endif
200+
201+
ray::Status TerminateChildProcessAndWaitForTimeout(pid_t pid, int fd, int timeout_ms) {
202+
if (kill(pid, SIGKILL) == -1) {
203+
return ray::Status::InvalidArgument(absl::StrFormat(
204+
"Failed to send SIGTERM to pid: %i with error %s.", pid, strerror(errno)));
205+
}
206+
struct pollfd poll_fd = {
207+
.fd = fd,
208+
.events = POLLIN,
209+
};
210+
211+
int poll_status = poll(&poll_fd, 1, timeout_ms);
212+
if (poll_status == -1) {
213+
return ray::Status::InvalidArgument(
214+
absl::StrFormat("Failed to poll process pid: %i, fd: %i with error %s. Process "
215+
"was not killed. Kill it manually to prevent a leak.",
216+
pid,
217+
fd,
218+
strerror(errno)));
219+
}
220+
if (poll_status == 0) {
221+
return ray::Status::Invalid(
222+
absl::StrFormat("Process pid: %i, fd: %i was not killed within the timeout of "
223+
"%ims. Kill it manually to prevent a leak.",
224+
pid,
225+
fd,
226+
timeout_ms));
227+
}
228+
siginfo_t dummy = {0};
229+
int wait_id_status = waitid(P_PID, static_cast<id_t>(fd), &dummy, WEXITED);
230+
if (wait_id_status == -1) {
231+
if (errno != ECHILD)
232+
return ray::Status::Invalid(
233+
absl::StrFormat("Failed to wait for process pid: %i, fd: %i with error %s. "
234+
"Process was not reaped, but "
235+
"it will be reaped by init after program exits.",
236+
pid,
237+
fd,
238+
strerror(errno)));
239+
};
240+
return ray::Status::OK();
241+
}
242+
243+
TempFile::TempFile(std::string path) {
244+
path_ = path;
245+
fd_ = open(path_.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); // NOLINT
246+
RAY_CHECK(fd_ != -1) << absl::StrFormat(
247+
"Failed to create a temp file at path %s with error %s. Cgroup tests expect "
248+
"tmpfs to be mounted and only run on Linux.",
249+
path_,
250+
strerror(errno));
251+
file_output_stream_ = std::ofstream(path_, std::ios::trunc);
252+
RAY_CHECK(file_output_stream_.is_open()) << absl::StrFormat(
253+
"Failed to open file %s on tmpfs with error %s", path_, strerror(errno));
254+
}
255+
256+
TempFile::TempFile() {
257+
fd_ = mkstemp(path_.data()); // NOLINT
258+
if (fd_ == -1) {
259+
throw std::runtime_error(
260+
"Failed to create a temp file. Cgroup tests expect tmpfs to be "
261+
"mounted "
262+
"and only run on Linux");
263+
}
264+
file_output_stream_ = std::ofstream(path_, std::ios::trunc);
265+
RAY_CHECK(file_output_stream_.is_open())
266+
<< absl::StrFormat("Could not open temporary file at path %s.", path_);
267+
}
268+
269+
TempFile::~TempFile() {
270+
RAY_CHECK(close(fd_) != -1) << absl::StrFormat(
271+
"Failed to close file descriptor with error %s.", strerror(errno));
272+
file_output_stream_.close();
273+
RAY_CHECK(unlink(path_.c_str()) != -1)
274+
<< absl::StrFormat("Failed to unlink temporary file at path %s with error %s.",
275+
path_,
276+
strerror(errno));
277+
}
278+
279+
void TempFile::AppendLine(const std::string &line) {
280+
file_output_stream_ << line;
281+
file_output_stream_.flush();
282+
// All current callers treat this is as a fatal error so this is a RAY_CHECK
283+
// instead of returning a Status.
284+
RAY_CHECK(file_output_stream_.good())
285+
<< absl::StrFormat("Failed to write to temporary file at path %s.", path_);
286+
}

0 commit comments

Comments
 (0)