-
Notifications
You must be signed in to change notification settings - Fork 356
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix --preserve-fds, eliminate stray FD being passed into container #2893
Changes from all commits
08bd823
4f388a3
6dfbb77
32d5dde
90131b3
f302ea0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ use nix::fcntl::{open, OFlag}; | |
use nix::mount::{mount, umount2, MntFlags, MsFlags}; | ||
use nix::sched::{unshare, CloneFlags}; | ||
use nix::sys::stat::{mknod, Mode, SFlag}; | ||
use nix::unistd::{chown, chroot, fchdir, pivot_root, sethostname, Gid, Uid}; | ||
use nix::unistd::{chown, chroot, close, fchdir, pivot_root, sethostname, Gid, Uid}; | ||
use oci_spec::runtime::PosixRlimit; | ||
|
||
use super::{Result, Syscall, SyscallError}; | ||
|
@@ -232,11 +232,15 @@ impl Syscall for LinuxSyscall { | |
/// Function to set given path as root path inside process | ||
fn pivot_rootfs(&self, path: &Path) -> Result<()> { | ||
// open the path as directory and read only | ||
let newroot = | ||
open(path, OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty()).map_err(|errno| { | ||
tracing::error!(?errno, ?path, "failed to open the new root for pivot root"); | ||
errno | ||
})?; | ||
let newroot = open( | ||
path, | ||
OFlag::O_DIRECTORY | OFlag::O_RDONLY | OFlag::O_CLOEXEC, | ||
Mode::empty(), | ||
) | ||
.map_err(|errno| { | ||
tracing::error!(?errno, ?path, "failed to open the new root for pivot root"); | ||
errno | ||
})?; | ||
|
||
// make the given path as the root directory for the container | ||
// see https://man7.org/linux/man-pages/man2/pivot_root.2.html, specially the notes | ||
|
@@ -279,6 +283,11 @@ impl Syscall for LinuxSyscall { | |
errno | ||
})?; | ||
|
||
close(newroot).map_err(|errno| { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this explicit close required? Does the |
||
tracing::error!(?errno, ?newroot, "failed to close new root directory"); | ||
errno | ||
})?; | ||
|
||
Ok(()) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
use std::fs; | ||
use std::os::fd::{AsRawFd, RawFd}; | ||
|
||
use anyhow::{anyhow, Context, Result}; | ||
use oci_spec::runtime::{ProcessBuilder, Spec, SpecBuilder}; | ||
use test_framework::{test_result, ConditionalTest, Test, TestGroup, TestResult}; | ||
|
||
use crate::utils::{is_runtime_runc, test_inside_container, CreateOptions}; | ||
|
||
fn create_spec() -> Result<Spec> { | ||
SpecBuilder::default() | ||
.process( | ||
ProcessBuilder::default() | ||
.args( | ||
["runtimetest", "fd_control"] | ||
.iter() | ||
.map(|s| s.to_string()) | ||
.collect::<Vec<String>>(), | ||
) | ||
.build()?, | ||
) | ||
.build() | ||
.context("failed to create spec") | ||
} | ||
|
||
fn open_devnull_no_cloexec() -> Result<(fs::File, RawFd)> { | ||
// Rust std by default sets cloexec, so we undo it | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
let devnull = fs::File::open("/dev/null")?; | ||
let devnull_fd = devnull.as_raw_fd(); | ||
let flags = nix::fcntl::fcntl(devnull_fd, nix::fcntl::FcntlArg::F_GETFD)?; | ||
let mut flags = nix::fcntl::FdFlag::from_bits_retain(flags); | ||
flags.remove(nix::fcntl::FdFlag::FD_CLOEXEC); | ||
nix::fcntl::fcntl(devnull_fd, nix::fcntl::FcntlArg::F_SETFD(flags))?; | ||
Ok((devnull, devnull_fd)) | ||
} | ||
|
||
// If not opening any other FDs, verify youki itself doesnt open anything that gets | ||
// leaked in if passing --preserve-fds with a large number | ||
// NOTE: this will also fail if the test harness itself starts leaking FDs | ||
fn only_stdio_test() -> TestResult { | ||
let spec = test_result!(create_spec()); | ||
test_inside_container( | ||
spec, | ||
&CreateOptions::default().with_extra_args(&["--preserve-fds".as_ref(), "100".as_ref()]), | ||
&|bundle_path| { | ||
fs::write(bundle_path.join("num-fds"), "0".as_bytes())?; | ||
Ok(()) | ||
}, | ||
) | ||
} | ||
|
||
// If we know we have an open FD without cloexec, it should be closed if preserve-fds | ||
// is 0 (the default) | ||
fn closes_fd_test() -> TestResult { | ||
// Open this before the setup function so it's kept alive for the container lifetime | ||
let (_devnull, _devnull_fd) = match open_devnull_no_cloexec() { | ||
Ok(v) => v, | ||
Err(e) => return TestResult::Failed(anyhow!("failed to open dev null: {}", e)), | ||
}; | ||
|
||
let spec = test_result!(create_spec()); | ||
test_inside_container( | ||
spec, | ||
&CreateOptions::default().with_extra_args(&["--preserve-fds".as_ref(), "0".as_ref()]), | ||
&|bundle_path| { | ||
fs::write(bundle_path.join("num-fds"), "0".as_bytes())?; | ||
Ok(()) | ||
}, | ||
) | ||
} | ||
|
||
// Given an open FD, verify it can be passed down with preserve-fds | ||
fn pass_single_fd_test() -> TestResult { | ||
// Open this before the setup function so it's kept alive for the container lifetime | ||
let (_devnull, devnull_fd) = match open_devnull_no_cloexec() { | ||
Ok(v) => v, | ||
Err(e) => return TestResult::Failed(anyhow!("failed to open dev null: {}", e)), | ||
}; | ||
|
||
let spec = test_result!(create_spec()); | ||
test_inside_container( | ||
spec, | ||
&CreateOptions::default().with_extra_args(&[ | ||
"--preserve-fds".as_ref(), | ||
(devnull_fd - 2).to_string().as_ref(), // relative to stdio | ||
]), | ||
&|bundle_path| { | ||
fs::write(bundle_path.join("num-fds"), "1".as_bytes())?; | ||
Ok(()) | ||
}, | ||
) | ||
} | ||
|
||
pub fn get_fd_control_test() -> TestGroup { | ||
let mut test_group = TestGroup::new("fd_control"); | ||
test_group.set_nonparallel(); // fds are process-wide state | ||
let test_only_stdio = ConditionalTest::new( | ||
"only_stdio", | ||
// runc errors if any of the N passed FDs via preserve-fd are not currently open | ||
Box::new(|| !is_runtime_runc()), | ||
Box::new(only_stdio_test), | ||
); | ||
let test_closes_fd = Test::new("closes_fd", Box::new(closes_fd_test)); | ||
let test_pass_single_fd = Test::new("pass_single_fd", Box::new(pass_single_fd_test)); | ||
// adding separately as one is conditional test and others are normal | ||
test_group.add(vec![Box::new(test_only_stdio)]); | ||
test_group.add(vec![ | ||
Box::new(test_closes_fd), | ||
Box::new(test_pass_single_fd), | ||
]); | ||
|
||
test_group | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
use std::env; | ||
use std::ffi::OsStr; | ||
use std::fs::{self, read_dir}; | ||
use std::os::linux::fs::MetadataExt; | ||
use std::os::unix::fs::{FileTypeExt, PermissionsExt}; | ||
|
@@ -775,3 +776,47 @@ pub fn validate_process_oom_score_adj(spec: &Spec) { | |
eprintln!("Unexpected oom_score_adj, expected: {expected_value} found: {actual_value}"); | ||
} | ||
} | ||
|
||
pub fn validate_fd_control(_spec: &Spec) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💯 |
||
// --preserve-fds does not get passed via the spec so we have to communicate information | ||
// via the root filesystem | ||
let expected_num_fds: usize = fs::read_to_string("/num-fds").unwrap().parse().unwrap(); | ||
|
||
let mut entries = vec![]; | ||
let stdio: &[&OsStr] = &["0".as_ref(), "1".as_ref(), "2".as_ref()]; | ||
for entry in fs::read_dir("/proc/self/fd").unwrap() { | ||
let entry = entry.unwrap(); | ||
let name = entry.file_name(); | ||
if stdio.contains(&name.as_os_str()) { | ||
// Ignore stdio | ||
continue; | ||
} | ||
entries.push((entry.path(), fs::read_link(entry.path()))) | ||
} | ||
|
||
// NOTE: we do this in a separate loop so we can filter out the dirfd used behind | ||
// the scenes in 'fs::read_dir'. It is important to *not* store the full DirEntry | ||
// type, as that keeps the dirfd open. | ||
let mut fd_details = vec![]; | ||
let mut found_dirfd = false; | ||
for (path, linkpath) in &entries { | ||
println!("found fd in container {} {:?}", path.display(), linkpath); | ||
// The difference between metadata.unwrap() and fs::metadata is that the latter | ||
// will now try to follow the symlink | ||
match fs::metadata(path) { | ||
Ok(m) => fd_details.push((path, linkpath, m)), | ||
Err(e) if e.kind() == std::io::ErrorKind::NotFound && !found_dirfd => { | ||
// Expected for the dirfd | ||
println!("(ignoring dirfd)"); | ||
found_dirfd = true | ||
} | ||
Err(e) => { | ||
eprintln!("unexpected error reading metadata: {}", e) | ||
} | ||
} | ||
} | ||
|
||
if fd_details.len() != expected_num_fds { | ||
eprintln!("mismatched fds inside container! {:?}", fd_details); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this revert is correct. Reading the security advisory, the action called out in the advisory is to set
O_CLOEXEC
on all FDs other than stdio. Theclose_range
in the--preserve-fds
check in the init process should take care of all this case.