Skip to content

Commit

Permalink
Merge pull request from GHSA-c827-hfw6-qwvm
Browse files Browse the repository at this point in the history
* Fix `rustix::fs::Dir` to avoid unbounded buffer growth.

Fix `Dir`'s buffer size computation to avoid resizing past a fixed
upper limit. This prevents it from growing without bound, such as in
the case of `Dir::rewind` for repeated iterations with the same `Dir`.

* Don't let `Dir` continue to try to iterate after a failure.

* Handle `io::Errno::INTR` gracefully.

* Write a more detailed comment on the buffer growth policy.

* Also mention that no buffer can ever be big enough for everything.

* Add tests against over-allocation & stuck iterator

* Rm `dir_iterator_does_not_overallocate` unit test in favour of docs

* Extend `test_dir` to cover `rewind`.

* Consistently handle directory removal as ending the stream.

libc implementations of directory iteration handle directory removal
by just ending the stream. In the linux_raw backend, this looks like
`ENOENT` from `getdents64`, so change the code to check for `ENOENT`
and end the stream.

This requires changing the `dir_iterator_does_not_get_stuck_on_io_error`
test to no longer expect a failure, so it's now renamed to
`dir_iterator_handles_dir_removal`.

To test the error case, add a new `dir_iterator_handles_io_errors`
test which uses `dup2` to induce an error, in both the linux_raw and
libc backends.

This exposes the fact that the libc `Dir` implementation was also
assuming that users would stop iterating after hitting a failure, so
add a `any_errors` flag to the libc backend as well.

* Add a test for removing the directory after doing `read_from`.

* In the libc backend, handle `ENOENT` when opening ".".

---------

Co-authored-by: cyqsimon <28627918+cyqsimon@users.noreply.github.com>
  • Loading branch information
sunfishcode and cyqsimon authored Oct 12, 2023
1 parent 4bb2f4b commit 31fd98c
Show file tree
Hide file tree
Showing 3 changed files with 224 additions and 26 deletions.
86 changes: 74 additions & 12 deletions src/backend/libc/fs/dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,13 @@ use core::ptr::NonNull;
use libc_errno::{errno, set_errno, Errno};

/// `DIR*`
#[repr(transparent)]
pub struct Dir(NonNull<c::DIR>);
pub struct Dir {
/// The `libc` `DIR` pointer.
libc_dir: NonNull<c::DIR>,

/// Have we seen any errors in this iteration?
any_errors: bool,
}

impl Dir {
/// Construct a `Dir` that reads entries from the given directory
Expand All @@ -43,20 +48,35 @@ impl Dir {

#[inline]
fn _read_from(fd: BorrowedFd<'_>) -> io::Result<Self> {
let mut any_errors = false;

// Given an arbitrary `OwnedFd`, it's impossible to know whether the
// user holds a `dup`'d copy which could continue to modify the
// file description state, which would cause Undefined Behavior after
// our call to `fdopendir`. To prevent this, we obtain an independent
// `OwnedFd`.
let flags = fcntl_getfl(fd)?;
let fd_for_dir = openat(fd, cstr!("."), flags | OFlags::CLOEXEC, Mode::empty())?;
let fd_for_dir = match openat(fd, cstr!("."), flags | OFlags::CLOEXEC, Mode::empty()) {
Ok(fd) => fd,
Err(io::Errno::NOENT) => {
// If "." doesn't exist, it means the directory was removed.
// We treat that as iterating through a directory with no
// entries.
any_errors = true;
crate::io::dup(fd)?
}
Err(err) => return Err(err),
};

let raw = owned_fd(fd_for_dir);
unsafe {
let libc_dir = c::fdopendir(raw);

if let Some(libc_dir) = NonNull::new(libc_dir) {
Ok(Self(libc_dir))
Ok(Self {
libc_dir,
any_errors,
})
} else {
let err = io::Errno::last_os_error();
let _ = c::close(raw);
Expand All @@ -68,20 +88,27 @@ impl Dir {
/// `rewinddir(self)`
#[inline]
pub fn rewind(&mut self) {
unsafe { c::rewinddir(self.0.as_ptr()) }
self.any_errors = false;
unsafe { c::rewinddir(self.libc_dir.as_ptr()) }
}

/// `readdir(self)`, where `None` means the end of the directory.
pub fn read(&mut self) -> Option<io::Result<DirEntry>> {
// If we've seen errors, don't continue to try to read anyting further.
if self.any_errors {
return None;
}

set_errno(Errno(0));
let dirent_ptr = unsafe { libc_readdir(self.0.as_ptr()) };
let dirent_ptr = unsafe { libc_readdir(self.libc_dir.as_ptr()) };
if dirent_ptr.is_null() {
let curr_errno = errno().0;
if curr_errno == 0 {
// We successfully reached the end of the stream.
None
} else {
// `errno` is unknown or non-zero, so an error occurred.
self.any_errors = true;
Some(Err(io::Errno(curr_errno)))
}
} else {
Expand Down Expand Up @@ -120,7 +147,7 @@ impl Dir {
/// `fstat(self)`
#[inline]
pub fn stat(&self) -> io::Result<Stat> {
fstat(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.0.as_ptr())) })
fstat(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.libc_dir.as_ptr())) })
}

/// `fstatfs(self)`
Expand All @@ -134,14 +161,14 @@ impl Dir {
)))]
#[inline]
pub fn statfs(&self) -> io::Result<StatFs> {
fstatfs(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.0.as_ptr())) })
fstatfs(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.libc_dir.as_ptr())) })
}

/// `fstatvfs(self)`
#[cfg(not(any(solarish, target_os = "haiku", target_os = "redox", target_os = "wasi")))]
#[inline]
pub fn statvfs(&self) -> io::Result<StatVfs> {
fstatvfs(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.0.as_ptr())) })
fstatvfs(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.libc_dir.as_ptr())) })
}

/// `fchdir(self)`
Expand All @@ -150,7 +177,7 @@ impl Dir {
#[cfg_attr(doc_cfg, doc(cfg(feature = "process")))]
#[inline]
pub fn chdir(&self) -> io::Result<()> {
fchdir(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.0.as_ptr())) })
fchdir(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.libc_dir.as_ptr())) })
}
}

Expand All @@ -163,7 +190,7 @@ unsafe impl Send for Dir {}
impl Drop for Dir {
#[inline]
fn drop(&mut self) {
unsafe { c::closedir(self.0.as_ptr()) };
unsafe { c::closedir(self.libc_dir.as_ptr()) };
}
}

Expand All @@ -179,7 +206,7 @@ impl Iterator for Dir {
impl fmt::Debug for Dir {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Dir")
.field("fd", unsafe { &c::dirfd(self.0.as_ptr()) })
.field("fd", unsafe { &c::dirfd(self.libc_dir.as_ptr()) })
.finish()
}
}
Expand Down Expand Up @@ -293,3 +320,38 @@ fn check_dirent_layout(dirent: &c::dirent) {
}
);
}

#[test]
fn dir_iterator_handles_io_errors() {
// create a dir, keep the FD, then delete the dir
let tmp = tempfile::tempdir().unwrap();
let fd = crate::fs::openat(
crate::fs::CWD,
tmp.path(),
crate::fs::OFlags::RDONLY | crate::fs::OFlags::CLOEXEC,
crate::fs::Mode::empty(),
)
.unwrap();

let file_fd = crate::fs::openat(
&fd,
tmp.path().join("test.txt"),
crate::fs::OFlags::WRONLY | crate::fs::OFlags::CREATE,
crate::fs::Mode::RWXU,
)
.unwrap();

let mut dir = Dir::read_from(&fd).unwrap();

// Reach inside the `Dir` and replace its directory with a file, which
// will cause the subsequent `readdir` to fail.
unsafe {
let raw_fd = c::dirfd(dir.libc_dir.as_ptr());
let mut owned_fd: crate::fd::OwnedFd = crate::fd::FromRawFd::from_raw_fd(raw_fd);
crate::io::dup2(&file_fd, &mut owned_fd).unwrap();
core::mem::forget(owned_fd);
}

assert!(matches!(dir.next(), Some(Err(_))));
assert!(matches!(dir.next(), None));
}
95 changes: 82 additions & 13 deletions src/backend/linux_raw/fs/dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,17 @@ pub struct Dir {
/// The `OwnedFd` that we read directory entries from.
fd: OwnedFd,

/// Have we seen any errors in this iteration?
any_errors: bool,

/// Should we rewind the stream on the next iteration?
rewind: bool,

/// The buffer for `linux_dirent64` entries.
buf: Vec<u8>,

/// Where we are in the buffer.
pos: usize,
next: Option<u64>,
}

impl Dir {
Expand All @@ -38,25 +46,39 @@ impl Dir {

Ok(Self {
fd: fd_for_dir,
any_errors: false,
rewind: false,
buf: Vec::new(),
pos: 0,
next: None,
})
}

/// `rewinddir(self)`
#[inline]
pub fn rewind(&mut self) {
self.any_errors = false;
self.rewind = true;
self.pos = self.buf.len();
self.next = Some(0);
}

/// `readdir(self)`, where `None` means the end of the directory.
pub fn read(&mut self) -> Option<io::Result<DirEntry>> {
if let Some(next) = self.next.take() {
match crate::backend::fs::syscalls::_seek(self.fd.as_fd(), next as i64, SEEK_SET) {
// If we've seen errors, don't continue to try to read anyting further.
if self.any_errors {
return None;
}

// If a rewind was requested, seek to the beginning.
if self.rewind {
self.rewind = false;
match io::retry_on_intr(|| {
crate::backend::fs::syscalls::_seek(self.fd.as_fd(), 0, SEEK_SET)
}) {
Ok(_) => (),
Err(err) => return Some(Err(err)),
Err(err) => {
self.any_errors = true;
return Some(Err(err));
}
}
}

Expand All @@ -78,7 +100,7 @@ impl Dir {
if self.buf.len() - self.pos < size_of::<linux_dirent64>() {
match self.read_more()? {
Ok(()) => (),
Err(e) => return Some(Err(e)),
Err(err) => return Some(Err(err)),
}
}

Expand Down Expand Up @@ -136,14 +158,31 @@ impl Dir {
}

fn read_more(&mut self) -> Option<io::Result<()>> {
let og_len = self.buf.len();
// Capacity increment currently chosen by wild guess.
self.buf
.resize(self.buf.capacity() + 32 * size_of::<linux_dirent64>(), 0);
let nread = match crate::backend::fs::syscalls::getdents(self.fd.as_fd(), &mut self.buf) {
// The first few times we're called, we allocate a relatively small
// buffer, because many directories are small. If we're called more,
// use progressively larger allocations, up to a fixed maximum.
//
// The specific sizes and policy here have not been tuned in detail yet
// and may need to be adjusted. In doing so, we should be careful to
// avoid unbounded buffer growth. This buffer only exists to share the
// cost of a `getdents` call over many entries, so if it gets too big,
// cache and heap usage will outweigh the benefit. And ultimately,
// directories can contain more entries than we can allocate contiguous
// memory for, so we'll always need to cap the size at some point.
if self.buf.len() < 1024 * size_of::<linux_dirent64>() {
self.buf.reserve(32 * size_of::<linux_dirent64>());
}
self.buf.resize(self.buf.capacity(), 0);
let nread = match io::retry_on_intr(|| {
crate::backend::fs::syscalls::getdents(self.fd.as_fd(), &mut self.buf)
}) {
Ok(nread) => nread,
Err(io::Errno::NOENT) => {
self.any_errors = true;
return None;
}
Err(err) => {
self.buf.resize(og_len, 0);
self.any_errors = true;
return Some(Err(err));
}
};
Expand Down Expand Up @@ -225,3 +264,33 @@ impl DirEntry {
self.d_ino
}
}

#[test]
fn dir_iterator_handles_io_errors() {
// create a dir, keep the FD, then delete the dir
let tmp = tempfile::tempdir().unwrap();
let fd = crate::fs::openat(
crate::fs::CWD,
tmp.path(),
crate::fs::OFlags::RDONLY | crate::fs::OFlags::CLOEXEC,
crate::fs::Mode::empty(),
)
.unwrap();

let file_fd = crate::fs::openat(
&fd,
tmp.path().join("test.txt"),
crate::fs::OFlags::WRONLY | crate::fs::OFlags::CREATE,
crate::fs::Mode::RWXU,
)
.unwrap();

let mut dir = Dir::read_from(&fd).unwrap();

// Reach inside the `Dir` and replace its directory with a file, which
// will cause the subsequent `getdents64` to fail.
crate::io::dup2(&file_fd, &mut dir.fd).unwrap();

assert!(matches!(dir.next(), Some(Err(_))));
assert!(matches!(dir.next(), None));
}
Loading

0 comments on commit 31fd98c

Please sign in to comment.