From c96c30aeedd753b4e7f4eaa51f7589fed09f40a1 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Mon, 3 Apr 2023 21:09:53 +0200 Subject: [PATCH 1/2] test that modifications to the source don't become visible after io::copy --- library/std/src/sys/unix/kernel_copy/tests.rs | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/library/std/src/sys/unix/kernel_copy/tests.rs b/library/std/src/sys/unix/kernel_copy/tests.rs index 3fe849e23e2e6..a524270e3fb85 100644 --- a/library/std/src/sys/unix/kernel_copy/tests.rs +++ b/library/std/src/sys/unix/kernel_copy/tests.rs @@ -83,6 +83,48 @@ fn copies_append_mode_sink() -> Result<()> { Ok(()) } +#[test] +fn dont_splice_pipes_from_files() -> Result<()> { + // splicing to a pipe and then modifying the source could lead to changes + // becoming visible in an unexpected order. + + use crate::io::SeekFrom; + use crate::os::unix::fs::FileExt; + use crate::process::{ChildStdin, ChildStdout}; + use crate::sys_common::FromInner; + + let (read_end, write_end) = crate::sys::pipe::anon_pipe()?; + + let mut read_end = ChildStdout::from_inner(read_end); + let mut write_end = ChildStdin::from_inner(write_end); + + let tmp_path = tmpdir(); + let file = tmp_path.join("to_be_modified"); + let mut file = + crate::fs::OpenOptions::new().create_new(true).read(true).write(true).open(file)?; + + const SZ: usize = libc::PIPE_BUF as usize; + + // put data in page cache + let mut buf: [u8; SZ] = [0x01; SZ]; + file.write_all(&buf).unwrap(); + + // copy page into pipe + file.seek(SeekFrom::Start(0)).unwrap(); + assert!(io::copy(&mut file, &mut write_end).unwrap() == SZ as u64); + + // modify file + buf[0] = 0x02; + file.write_at(&buf, 0).unwrap(); + + // read from pipe + read_end.read_exact(buf.as_mut_slice()).unwrap(); + + assert_eq!(buf[0], 0x01, "data in pipe should reflect the original, not later modifications"); + + Ok(()) +} + #[bench] fn bench_file_to_file_copy(b: &mut test::Bencher) { const BYTES: usize = 128 * 1024; From 171ccb54f06a168f10249b6cbfc22b1cb66ee125 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Mon, 20 Feb 2023 20:56:50 +0100 Subject: [PATCH 2/2] avoid zero-copy ops for File->Pipe and File->Socket in io::copy --- library/std/src/sys/unix/kernel_copy.rs | 62 ++++++++++++++++++------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/library/std/src/sys/unix/kernel_copy.rs b/library/std/src/sys/unix/kernel_copy.rs index 73b9bef7e2ac9..16c8e0c0ebfc5 100644 --- a/library/std/src/sys/unix/kernel_copy.rs +++ b/library/std/src/sys/unix/kernel_copy.rs @@ -17,11 +17,9 @@ //! Once it has obtained all necessary pieces and brought any wrapper types into a state where they //! can be safely bypassed it will attempt to use the `copy_file_range(2)`, //! `sendfile(2)` or `splice(2)` syscalls to move data directly between file descriptors. -//! Since those syscalls have requirements that cannot be fully checked in advance and -//! gathering additional information about file descriptors would require additional syscalls -//! anyway it simply attempts to use them one after another (guided by inaccurate hints) to -//! figure out which one works and falls back to the generic read-write copy loop if none of them -//! does. +//! Since those syscalls have requirements that cannot be fully checked in advance it attempts +//! to use them one after another (guided by hints) to figure out which one works and +//! falls back to the generic read-write copy loop if none of them does. //! Once a working syscall is found for a pair of file descriptors it will be called in a loop //! until the copy operation is completed. //! @@ -84,14 +82,10 @@ pub(crate) fn copy_spec( /// The methods on this type only provide hints, due to `AsRawFd` and `FromRawFd` the inferred /// type may be wrong. enum FdMeta { - /// We obtained the FD from a type that can contain any type of `FileType` and queried the metadata - /// because it is cheaper than probing all possible syscalls (reader side) Metadata(Metadata), Socket, Pipe, - /// We don't have any metadata, e.g. because the original type was `File` which can represent - /// any `FileType` and we did not query the metadata either since it did not seem beneficial - /// (writer side) + /// We don't have any metadata because the stat syscall failed NoneObtained, } @@ -131,6 +125,39 @@ impl FdMeta { } } +/// Returns true either if changes made to the source after a sendfile/splice call won't become +/// visible in the sink or the source has explicitly opted into such behavior (e.g. by splicing +/// a file into a pipe, the pipe being the source in this case). +/// +/// This will prevent File -> Pipe and File -> Socket splicing/sendfile optimizations to uphold +/// the Read/Write API semantics of io::copy. +/// +/// Note: This is not 100% airtight, the caller can use the RawFd conversion methods to turn a +/// regular file into a TcpSocket which will be treated as a socket here without checking. +fn safe_kernel_copy(source: &FdMeta, sink: &FdMeta) -> bool { + match (source, sink) { + // Data arriving from a socket is safe because the sender can't modify the socket buffer. + // Data arriving from a pipe is safe(-ish) because either the sender *copied* + // the bytes into the pipe OR explicitly performed an operation that enables zero-copy, + // thus promising not to modify the data later. + (FdMeta::Socket, _) => true, + (FdMeta::Pipe, _) => true, + (FdMeta::Metadata(meta), _) + if meta.file_type().is_fifo() || meta.file_type().is_socket() => + { + true + } + // Data going into non-pipes/non-sockets is safe because the "later changes may become visible" issue + // only happens for pages sitting in send buffers or pipes. + (_, FdMeta::Metadata(meta)) + if !meta.file_type().is_fifo() && !meta.file_type().is_socket() => + { + true + } + _ => false, + } +} + struct CopyParams(FdMeta, Option); struct Copier<'a, 'b, R: Read + ?Sized, W: Write + ?Sized> { @@ -186,7 +213,8 @@ impl SpecCopy for Copier<'_, '_, R, W> { // So we just try and fallback if needed. // If current file offsets + write sizes overflow it may also fail, we do not try to fix that and instead // fall back to the generic copy loop. - if input_meta.potential_sendfile_source() { + if input_meta.potential_sendfile_source() && safe_kernel_copy(&input_meta, &output_meta) + { let result = sendfile_splice(SpliceMode::Sendfile, readfd, writefd, max_write); result.update_take(reader); @@ -197,7 +225,9 @@ impl SpecCopy for Copier<'_, '_, R, W> { } } - if input_meta.maybe_fifo() || output_meta.maybe_fifo() { + if (input_meta.maybe_fifo() || output_meta.maybe_fifo()) + && safe_kernel_copy(&input_meta, &output_meta) + { let result = sendfile_splice(SpliceMode::Splice, readfd, writefd, max_write); result.update_take(reader); @@ -298,13 +328,13 @@ impl CopyRead for &File { impl CopyWrite for File { fn properties(&self) -> CopyParams { - CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd())) + CopyParams(fd_to_meta(self), Some(self.as_raw_fd())) } } impl CopyWrite for &File { fn properties(&self) -> CopyParams { - CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd())) + CopyParams(fd_to_meta(*self), Some(self.as_raw_fd())) } } @@ -401,13 +431,13 @@ impl CopyRead for StdinLock<'_> { impl CopyWrite for StdoutLock<'_> { fn properties(&self) -> CopyParams { - CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd())) + CopyParams(fd_to_meta(self), Some(self.as_raw_fd())) } } impl CopyWrite for StderrLock<'_> { fn properties(&self) -> CopyParams { - CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd())) + CopyParams(fd_to_meta(self), Some(self.as_raw_fd())) } }