From fe052461e10e052afa3e63418fc50e7881e53dac Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Wed, 6 Mar 2024 12:42:32 +0100 Subject: [PATCH 1/3] Less syscalls for the `copy_file_range` probe If it's obvious from the actual syscall results themselves that the syscall is supported or unsupported, don't do an extra syscall with an invalid file descriptor. CC #122052 --- std/src/sys/pal/unix/kernel_copy.rs | 71 +++++++++++++++++++---------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/std/src/sys/pal/unix/kernel_copy.rs b/std/src/sys/pal/unix/kernel_copy.rs index 18acd5ecccd5c..60f4e55755d73 100644 --- a/std/src/sys/pal/unix/kernel_copy.rs +++ b/std/src/sys/pal/unix/kernel_copy.rs @@ -560,6 +560,12 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> // We store the availability in a global to avoid unnecessary syscalls static HAS_COPY_FILE_RANGE: AtomicU8 = AtomicU8::new(NOT_PROBED); + let mut have_probed = match HAS_COPY_FILE_RANGE.load(Ordering::Relaxed) { + NOT_PROBED => false, + UNAVAILABLE => return CopyResult::Fallback(0), + _ => true, + }; + syscall! { fn copy_file_range( fd_in: libc::c_int, @@ -571,26 +577,6 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> ) -> libc::ssize_t } - match HAS_COPY_FILE_RANGE.load(Ordering::Relaxed) { - NOT_PROBED => { - // EPERM can indicate seccomp filters or an immutable file. - // To distinguish these cases we probe with invalid file descriptors which should result in EBADF if the syscall is supported - // and some other error (ENOSYS or EPERM) if it's not available - let result = unsafe { - cvt(copy_file_range(INVALID_FD, ptr::null_mut(), INVALID_FD, ptr::null_mut(), 1, 0)) - }; - - if matches!(result.map_err(|e| e.raw_os_error()), Err(Some(EBADF))) { - HAS_COPY_FILE_RANGE.store(AVAILABLE, Ordering::Relaxed); - } else { - HAS_COPY_FILE_RANGE.store(UNAVAILABLE, Ordering::Relaxed); - return CopyResult::Fallback(0); - } - } - UNAVAILABLE => return CopyResult::Fallback(0), - _ => {} - }; - let mut written = 0u64; while written < max_len { let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64); @@ -604,6 +590,11 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> cvt(copy_file_range(reader, ptr::null_mut(), writer, ptr::null_mut(), bytes_to_copy, 0)) }; + if !have_probed && copy_result.is_ok() { + have_probed = true; + HAS_COPY_FILE_RANGE.store(AVAILABLE, Ordering::Relaxed); + } + match copy_result { Ok(0) if written == 0 => { // fallback to work around several kernel bugs where copy_file_range will fail to @@ -616,10 +607,44 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> Ok(0) => return CopyResult::Ended(written), // reached EOF Ok(ret) => written += ret as u64, Err(err) => { - return match err.raw_os_error() { + let raw_os_error = match err.raw_os_error() { + Some(raw) => raw, + _ => return CopyResult::Error(err, written), + }; + return match raw_os_error { // when file offset + max_length > u64::MAX - Some(EOVERFLOW) => CopyResult::Fallback(written), - Some(ENOSYS | EXDEV | EINVAL | EPERM | EOPNOTSUPP | EBADF) if written == 0 => { + EOVERFLOW => CopyResult::Fallback(written), + ENOSYS | EXDEV | EINVAL | EPERM | EOPNOTSUPP | EBADF if written == 0 => { + if !have_probed { + if raw_os_error == ENOSYS { + HAS_COPY_FILE_RANGE.store(UNAVAILABLE, Ordering::Relaxed); + } else { + // EPERM can indicate seccomp filters or an + // immutable file. To distinguish these cases + // we probe with invalid file descriptors which + // should result in EBADF if the syscall is + // supported and some other error (ENOSYS or + // EPERM) if it's not available. + let result = unsafe { + cvt(copy_file_range( + INVALID_FD, + ptr::null_mut(), + INVALID_FD, + ptr::null_mut(), + 1, + 0, + )) + }; + + if matches!(result.map_err(|e| e.raw_os_error()), Err(Some(EBADF))) + { + HAS_COPY_FILE_RANGE.store(AVAILABLE, Ordering::Relaxed); + } else { + HAS_COPY_FILE_RANGE.store(UNAVAILABLE, Ordering::Relaxed); + } + } + } + // Try fallback io::copy if either: // - Kernel version is < 4.5 (ENOSYS¹) // - Files are mounted on different fs (EXDEV) From 30c876cbfb4818dab696affe8c4807a1a771cfb8 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Wed, 6 Mar 2024 14:37:25 +0100 Subject: [PATCH 2/3] Be stricter with `copy_file_range` probe results --- std/src/sys/pal/unix/kernel_copy.rs | 68 +++++++++++++++-------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/std/src/sys/pal/unix/kernel_copy.rs b/std/src/sys/pal/unix/kernel_copy.rs index 60f4e55755d73..1db86bdb180f9 100644 --- a/std/src/sys/pal/unix/kernel_copy.rs +++ b/std/src/sys/pal/unix/kernel_copy.rs @@ -607,42 +607,44 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> Ok(0) => return CopyResult::Ended(written), // reached EOF Ok(ret) => written += ret as u64, Err(err) => { - let raw_os_error = match err.raw_os_error() { - Some(raw) => raw, - _ => return CopyResult::Error(err, written), - }; - return match raw_os_error { + return match err.raw_os_error() { // when file offset + max_length > u64::MAX - EOVERFLOW => CopyResult::Fallback(written), - ENOSYS | EXDEV | EINVAL | EPERM | EOPNOTSUPP | EBADF if written == 0 => { + Some(EOVERFLOW) => CopyResult::Fallback(written), + Some(raw_os_error @ (ENOSYS | EXDEV | EINVAL | EPERM | EOPNOTSUPP | EBADF)) + if written == 0 => + { if !have_probed { - if raw_os_error == ENOSYS { - HAS_COPY_FILE_RANGE.store(UNAVAILABLE, Ordering::Relaxed); - } else { - // EPERM can indicate seccomp filters or an - // immutable file. To distinguish these cases - // we probe with invalid file descriptors which - // should result in EBADF if the syscall is - // supported and some other error (ENOSYS or - // EPERM) if it's not available. - let result = unsafe { - cvt(copy_file_range( - INVALID_FD, - ptr::null_mut(), - INVALID_FD, - ptr::null_mut(), - 1, - 0, - )) - }; - - if matches!(result.map_err(|e| e.raw_os_error()), Err(Some(EBADF))) - { - HAS_COPY_FILE_RANGE.store(AVAILABLE, Ordering::Relaxed); - } else { - HAS_COPY_FILE_RANGE.store(UNAVAILABLE, Ordering::Relaxed); + let available = match raw_os_error { + EPERM => { + // EPERM can indicate seccomp filters or an + // immutable file. To distinguish these + // cases we probe with invalid file + // descriptors which should result in EBADF + // if the syscall is supported and EPERM or + // ENOSYS if it's not available. + match unsafe { + cvt(copy_file_range( + INVALID_FD, + ptr::null_mut(), + INVALID_FD, + ptr::null_mut(), + 1, + 0, + )) + .map_err(|e| e.raw_os_error()) + } { + Err(Some(EPERM | ENOSYS)) => UNAVAILABLE, + Err(Some(EBADF)) => AVAILABLE, + Ok(_) => panic!("unexpected copy_file_range probe success"), + // Treat other errors as the syscall + // being unavailable. + Err(_) => UNAVAILABLE, + } } - } + ENOSYS => UNAVAILABLE, + _ => AVAILABLE, + }; + HAS_COPY_FILE_RANGE.store(available, Ordering::Relaxed); } // Try fallback io::copy if either: From 7a6ddb35da680caf0ffe100f7575ff2ff6538204 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Sat, 27 Apr 2024 18:44:30 +0200 Subject: [PATCH 3/3] Lift the probe code of `copy_file_range` into a function --- std/src/sys/pal/unix/kernel_copy.rs | 60 +++++++++++++++-------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/std/src/sys/pal/unix/kernel_copy.rs b/std/src/sys/pal/unix/kernel_copy.rs index 1db86bdb180f9..cd38b7c07e2b1 100644 --- a/std/src/sys/pal/unix/kernel_copy.rs +++ b/std/src/sys/pal/unix/kernel_copy.rs @@ -577,6 +577,23 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> ) -> libc::ssize_t } + fn probe_copy_file_range_support() -> u8 { + // In some cases, we cannot determine availability from the first + // `copy_file_range` call. In this case, we probe with an invalid file + // descriptor so that the results are easily interpretable. + match unsafe { + cvt(copy_file_range(INVALID_FD, ptr::null_mut(), INVALID_FD, ptr::null_mut(), 1, 0)) + .map_err(|e| e.raw_os_error()) + } { + Err(Some(EPERM | ENOSYS)) => UNAVAILABLE, + Err(Some(EBADF)) => AVAILABLE, + Ok(_) => panic!("unexpected copy_file_range probe success"), + // Treat other errors as the syscall + // being unavailable. + Err(_) => UNAVAILABLE, + } + } + let mut written = 0u64; while written < max_len { let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64); @@ -614,35 +631,20 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> if written == 0 => { if !have_probed { - let available = match raw_os_error { - EPERM => { - // EPERM can indicate seccomp filters or an - // immutable file. To distinguish these - // cases we probe with invalid file - // descriptors which should result in EBADF - // if the syscall is supported and EPERM or - // ENOSYS if it's not available. - match unsafe { - cvt(copy_file_range( - INVALID_FD, - ptr::null_mut(), - INVALID_FD, - ptr::null_mut(), - 1, - 0, - )) - .map_err(|e| e.raw_os_error()) - } { - Err(Some(EPERM | ENOSYS)) => UNAVAILABLE, - Err(Some(EBADF)) => AVAILABLE, - Ok(_) => panic!("unexpected copy_file_range probe success"), - // Treat other errors as the syscall - // being unavailable. - Err(_) => UNAVAILABLE, - } - } - ENOSYS => UNAVAILABLE, - _ => AVAILABLE, + let available = if matches!(raw_os_error, ENOSYS | EOPNOTSUPP | EPERM) { + // EPERM can indicate seccomp filters or an + // immutable file. To distinguish these + // cases we probe with invalid file + // descriptors which should result in EBADF + // if the syscall is supported and EPERM or + // ENOSYS if it's not available. + // + // For EOPNOTSUPP, see below. In the case of + // ENOSYS, we try to cover for faulty FUSE + // drivers. + probe_copy_file_range_support() + } else { + AVAILABLE }; HAS_COPY_FILE_RANGE.store(available, Ordering::Relaxed); }