From e601dcf19df901a08d846c3e2bf57687dfc3aa4e Mon Sep 17 00:00:00 2001 From: Alex Saveau Date: Mon, 12 Jun 2023 10:05:30 -0700 Subject: [PATCH] Improve perf of all buffer consuming APIs (#502) Signed-off-by: Alex Saveau --- src/backend/libc/fs/syscalls.rs | 6 +++- src/backend/libc/process/syscalls.rs | 2 +- src/backend/libc/termios/syscalls.rs | 2 +- src/backend/linux_raw/fs/syscalls.rs | 6 +++- src/backend/linux_raw/process/syscalls.rs | 2 +- src/backend/linux_raw/termios/syscalls.rs | 19 +++++++----- src/fs/at.rs | 38 ++++++++++++++++------- src/process/chdir.rs | 31 +++++++++++------- src/termios/tty.rs | 29 +++++++++++------ 9 files changed, 91 insertions(+), 44 deletions(-) diff --git a/src/backend/libc/fs/syscalls.rs b/src/backend/libc/fs/syscalls.rs index 7360cdf71..52eab8917 100644 --- a/src/backend/libc/fs/syscalls.rs +++ b/src/backend/libc/fs/syscalls.rs @@ -220,7 +220,11 @@ pub(crate) fn readlink(path: &CStr, buf: &mut [u8]) -> io::Result { #[cfg(not(target_os = "redox"))] #[inline] -pub(crate) fn readlinkat(dirfd: BorrowedFd<'_>, path: &CStr, buf: &mut [u8]) -> io::Result { +pub(crate) fn readlinkat( + dirfd: BorrowedFd<'_>, + path: &CStr, + buf: &mut [MaybeUninit], +) -> io::Result { unsafe { ret_usize(c::readlinkat( borrowed_fd(dirfd), diff --git a/src/backend/libc/process/syscalls.rs b/src/backend/libc/process/syscalls.rs index 8ddd3ed86..abbb8ba14 100644 --- a/src/backend/libc/process/syscalls.rs +++ b/src/backend/libc/process/syscalls.rs @@ -54,7 +54,7 @@ pub(crate) fn chroot(path: &CStr) -> io::Result<()> { #[cfg(feature = "fs")] #[cfg(not(target_os = "wasi"))] -pub(crate) fn getcwd(buf: &mut [u8]) -> io::Result<()> { +pub(crate) fn getcwd(buf: &mut [MaybeUninit]) -> io::Result<()> { unsafe { ret_discarded_char_ptr(c::getcwd(buf.as_mut_ptr().cast(), buf.len())) } } diff --git a/src/backend/libc/termios/syscalls.rs b/src/backend/libc/termios/syscalls.rs index 98a59070d..769fbd7ec 100644 --- a/src/backend/libc/termios/syscalls.rs +++ b/src/backend/libc/termios/syscalls.rs @@ -210,7 +210,7 @@ pub(crate) fn isatty(fd: BorrowedFd<'_>) -> bool { #[cfg(feature = "procfs")] #[cfg(not(any(target_os = "fuchsia", target_os = "wasi")))] -pub(crate) fn ttyname(dirfd: BorrowedFd<'_>, buf: &mut [u8]) -> io::Result { +pub(crate) fn ttyname(dirfd: BorrowedFd<'_>, buf: &mut [MaybeUninit]) -> io::Result { unsafe { // `ttyname_r` returns its error status rather than using `errno`. match c::ttyname_r(borrowed_fd(dirfd), buf.as_mut_ptr().cast(), buf.len()) { diff --git a/src/backend/linux_raw/fs/syscalls.rs b/src/backend/linux_raw/fs/syscalls.rs index 30e7bb0d4..4dc2f3506 100644 --- a/src/backend/linux_raw/fs/syscalls.rs +++ b/src/backend/linux_raw/fs/syscalls.rs @@ -861,7 +861,11 @@ pub(crate) fn readlink(path: &CStr, buf: &mut [u8]) -> io::Result { } #[inline] -pub(crate) fn readlinkat(dirfd: BorrowedFd<'_>, path: &CStr, buf: &mut [u8]) -> io::Result { +pub(crate) fn readlinkat( + dirfd: BorrowedFd<'_>, + path: &CStr, + buf: &mut [MaybeUninit], +) -> io::Result { let (buf_addr_mut, buf_len) = slice_mut(buf); unsafe { ret_usize(syscall!( diff --git a/src/backend/linux_raw/process/syscalls.rs b/src/backend/linux_raw/process/syscalls.rs index 753925419..0925e3a3e 100644 --- a/src/backend/linux_raw/process/syscalls.rs +++ b/src/backend/linux_raw/process/syscalls.rs @@ -56,7 +56,7 @@ pub(crate) fn chroot(filename: &CStr) -> io::Result<()> { #[cfg(feature = "fs")] #[inline] -pub(crate) fn getcwd(buf: &mut [u8]) -> io::Result { +pub(crate) fn getcwd(buf: &mut [MaybeUninit]) -> io::Result { let (buf_addr_mut, buf_len) = slice_mut(buf); unsafe { ret_usize(syscall!(__NR_getcwd, buf_addr_mut, buf_len)) } } diff --git a/src/backend/linux_raw/termios/syscalls.rs b/src/backend/linux_raw/termios/syscalls.rs index 74c176cff..56f8fbab2 100644 --- a/src/backend/linux_raw/termios/syscalls.rs +++ b/src/backend/linux_raw/termios/syscalls.rs @@ -273,7 +273,8 @@ pub(crate) fn isatty(fd: BorrowedFd<'_>) -> bool { } #[cfg(feature = "procfs")] -pub(crate) fn ttyname(fd: BorrowedFd<'_>, buf: &mut [u8]) -> io::Result { +#[allow(unsafe_code)] +pub(crate) fn ttyname(fd: BorrowedFd<'_>, buf: &mut [MaybeUninit]) -> io::Result { let fd_stat = crate::backend::fs::syscalls::fstat(fd)?; // Quick check: if `fd` isn't a character device, it's not a tty. @@ -300,14 +301,18 @@ pub(crate) fn ttyname(fd: BorrowedFd<'_>, buf: &mut [u8]) -> io::Result { if r == buf.len() { return Err(io::Errno::RANGE); } - buf[r] = b'\0'; + // SAFETY: readlinkat returns the number of bytes placed in the buffer + buf[r].write(b'\0'); // Check that the path we read refers to the same file as `fd`. - let path = CStr::from_bytes_with_nul(&buf[..=r]).unwrap(); - - let path_stat = crate::backend::fs::syscalls::stat(path)?; - if path_stat.st_dev != fd_stat.st_dev || path_stat.st_ino != fd_stat.st_ino { - return Err(io::Errno::NODEV); + { + // SAFETY: We just wrote the NUL byte above + let path = unsafe { CStr::from_ptr(buf.as_ptr().cast()) }; + + let path_stat = crate::backend::fs::syscalls::stat(path)?; + if path_stat.st_dev != fd_stat.st_dev || path_stat.st_ino != fd_stat.st_ino { + return Err(io::Errno::NODEV); + } } Ok(r) diff --git a/src/fs/at.rs b/src/fs/at.rs index 1bb4f3672..0928a1cca 100644 --- a/src/fs/at.rs +++ b/src/fs/at.rs @@ -64,7 +64,7 @@ pub fn openat( /// `readlinkat(fd, path)`—Reads the contents of a symlink. /// -/// If `reuse` is non-empty, reuse its buffer to store the result if possible. +/// If `reuse` already has available capacity, reuse it if possible. /// /// # References /// - [POSIX] @@ -81,24 +81,38 @@ pub fn readlinkat>>( path.into_with_c_str(|path| _readlinkat(dirfd.as_fd(), path, reuse.into())) } +#[allow(unsafe_code)] fn _readlinkat(dirfd: BorrowedFd<'_>, path: &CStr, mut buffer: Vec) -> io::Result { - // This code would benefit from having a better way to read into - // uninitialized memory, but that requires `unsafe`. buffer.clear(); buffer.reserve(SMALL_PATH_BUFFER_SIZE); - buffer.resize(buffer.capacity(), 0_u8); loop { - let nread = backend::fs::syscalls::readlinkat(dirfd.as_fd(), path, &mut buffer)?; + let nread = + backend::fs::syscalls::readlinkat(dirfd.as_fd(), path, buffer.spare_capacity_mut())?; - let nread = nread as usize; - assert!(nread <= buffer.len()); - if nread < buffer.len() { - buffer.resize(nread, 0_u8); - return Ok(CString::new(buffer).unwrap()); + debug_assert!(nread <= buffer.capacity()); + if nread < buffer.capacity() { + // SAFETY from the man page: + // "On success, these calls return the number of bytes placed in buf." + unsafe { + buffer.set_len(nread); + } + + // SAFETY: + // - "readlink places the contents of the symbolic link pathname in the buffer buf" + // - POSIX definition 3.271 Pathname (https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271 + // "A string that is used to identify a file." + // POSIX definition 3.375: String (https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_375) + // "A contiguous sequence of bytes terminated by and including the first null byte." + // - "readlink does not append a terminating null byte to buf." + // + // Thus, there will be no NUL bytes in the string. + unsafe { + return Ok(CString::from_vec_unchecked(buffer)); + } } - buffer.reserve(1); // use `Vec` reallocation strategy to grow capacity exponentially - buffer.resize(buffer.capacity(), 0_u8); + + buffer.reserve(buffer.capacity() + 1); // use `Vec` reallocation strategy to grow capacity exponentially } } diff --git a/src/process/chdir.rs b/src/process/chdir.rs index d1202c3a9..5253064ac 100644 --- a/src/process/chdir.rs +++ b/src/process/chdir.rs @@ -4,7 +4,7 @@ use crate::backend::fd::AsFd; use crate::{backend, io}; #[cfg(feature = "fs")] use { - crate::ffi::CString, + crate::ffi::{CStr, CString}, crate::path::{self, SMALL_PATH_BUFFER_SIZE}, alloc::vec::Vec, }; @@ -40,7 +40,7 @@ pub fn fchdir(fd: Fd) -> io::Result<()> { /// `getCWD`—Return the current working directory. /// -/// If `reuse` is non-empty, reuse its buffer to store the result if possible. +/// If `reuse` already has available capacity, reuse it if possible. /// /// # References /// - [POSIX] @@ -57,23 +57,32 @@ pub fn getcwd>>(reuse: B) -> io::Result { } #[cfg(feature = "fs")] +#[allow(unsafe_code)] fn _getcwd(mut buffer: Vec) -> io::Result { - // This code would benefit from having a better way to read into - // uninitialized memory, but that requires `unsafe`. buffer.clear(); buffer.reserve(SMALL_PATH_BUFFER_SIZE); - buffer.resize(buffer.capacity(), 0_u8); loop { - match backend::process::syscalls::getcwd(&mut buffer) { + match backend::process::syscalls::getcwd(buffer.spare_capacity_mut()) { Err(io::Errno::RANGE) => { - buffer.reserve(1); // use `Vec` reallocation strategy to grow capacity exponentially - buffer.resize(buffer.capacity(), 0_u8); + buffer.reserve(buffer.capacity() + 1); // use `Vec` reallocation strategy to grow capacity exponentially } Ok(_) => { - let len = buffer.iter().position(|x| *x == b'\0').unwrap(); - buffer.resize(len, 0_u8); - return Ok(CString::new(buffer).unwrap()); + // SAFETY: + // - "These functions return a null-terminated string" + // - POSIX definition 3.375: String (https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_375) + // "A contiguous sequence of bytes terminated by and including the first null byte." + // + // Thus, there will be a single NUL byte at the end of the string. + unsafe { + buffer.set_len( + CStr::from_ptr(buffer.as_ptr().cast()) + .to_bytes_with_nul() + .len(), + ); + + return Ok(CString::from_vec_with_nul_unchecked(buffer)); + } } Err(errno) => return Err(errno), } diff --git a/src/termios/tty.rs b/src/termios/tty.rs index 0fcf1d5f2..504783b55 100644 --- a/src/termios/tty.rs +++ b/src/termios/tty.rs @@ -24,7 +24,7 @@ pub fn isatty(fd: Fd) -> bool { /// `ttyname_r(fd)` /// -/// If `reuse` is non-empty, reuse its buffer to store the result if possible. +/// If `reuse` already has available capacity, reuse it if possible. /// /// # References /// - [POSIX] @@ -43,22 +43,33 @@ pub fn ttyname>>(dirfd: Fd, reuse: B) -> io::Result, mut buffer: Vec) -> io::Result { - // This code would benefit from having a better way to read into - // uninitialized memory, but that requires `unsafe`. buffer.clear(); buffer.reserve(SMALL_PATH_BUFFER_SIZE); - buffer.resize(buffer.capacity(), 0_u8); loop { - match backend::termios::syscalls::ttyname(dirfd, &mut buffer) { + match backend::termios::syscalls::ttyname(dirfd, buffer.spare_capacity_mut()) { Err(io::Errno::RANGE) => { - buffer.reserve(1); // use `Vec` reallocation strategy to grow capacity exponentially - buffer.resize(buffer.capacity(), 0_u8); + buffer.reserve(buffer.capacity() + 1); // use `Vec` reallocation strategy to grow capacity exponentially } Ok(len) => { - buffer.resize(len, 0_u8); - return Ok(CString::new(buffer).unwrap()); + // SAFETY: assume the backend returns the length of the string + unsafe { + buffer.set_len(len); + } + + // SAFETY: + // - "ttyname_r stores this pathname in the buffer buf" + // - POSIX definition 3.271 Pathname (https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271 + // "A string that is used to identify a file." + // POSIX definition 3.375: String (https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_375) + // "A contiguous sequence of bytes terminated by and including the first null byte." + // + // Thus, there will be a single NUL byte at the end of the string. + unsafe { + return Ok(CString::from_vec_with_nul_unchecked(buffer)); + } } Err(errno) => return Err(errno), }