Skip to content

Commit a29dada

Browse files
committedApr 13, 2023
Auto merge of #108283 - the8472:remove-splice-into-pipe, r=ChrisDenton
don't splice from files into pipes in io::copy This fixes potential data ordering issue where a write performed after a copy operation could become visible in the copy even though it signaled completion. I assumed that by not setting `SPLICE_F_MOVE` we would be safe and the kernel would do a copy in kernel space and we could avoid the read-write syscall and copy-to/from-userspace costs. But apparently that flag only makes a difference when splicing from a pipe, but not when splicing into it. Context: https://lkml.org/lkml/2023/2/9/673
2 parents d8fc819 + 171ccb5 commit a29dada

File tree

2 files changed

+88
-16
lines changed

2 files changed

+88
-16
lines changed
 

‎library/std/src/sys/unix/kernel_copy.rs

+46-16
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@
1717
//! Once it has obtained all necessary pieces and brought any wrapper types into a state where they
1818
//! can be safely bypassed it will attempt to use the `copy_file_range(2)`,
1919
//! `sendfile(2)` or `splice(2)` syscalls to move data directly between file descriptors.
20-
//! Since those syscalls have requirements that cannot be fully checked in advance and
21-
//! gathering additional information about file descriptors would require additional syscalls
22-
//! anyway it simply attempts to use them one after another (guided by inaccurate hints) to
23-
//! figure out which one works and falls back to the generic read-write copy loop if none of them
24-
//! does.
20+
//! Since those syscalls have requirements that cannot be fully checked in advance it attempts
21+
//! to use them one after another (guided by hints) to figure out which one works and
22+
//! falls back to the generic read-write copy loop if none of them does.
2523
//! Once a working syscall is found for a pair of file descriptors it will be called in a loop
2624
//! until the copy operation is completed.
2725
//!
@@ -84,14 +82,10 @@ pub(crate) fn copy_spec<R: Read + ?Sized, W: Write + ?Sized>(
8482
/// The methods on this type only provide hints, due to `AsRawFd` and `FromRawFd` the inferred
8583
/// type may be wrong.
8684
enum FdMeta {
87-
/// We obtained the FD from a type that can contain any type of `FileType` and queried the metadata
88-
/// because it is cheaper than probing all possible syscalls (reader side)
8985
Metadata(Metadata),
9086
Socket,
9187
Pipe,
92-
/// We don't have any metadata, e.g. because the original type was `File` which can represent
93-
/// any `FileType` and we did not query the metadata either since it did not seem beneficial
94-
/// (writer side)
88+
/// We don't have any metadata because the stat syscall failed
9589
NoneObtained,
9690
}
9791

@@ -131,6 +125,39 @@ impl FdMeta {
131125
}
132126
}
133127

128+
/// Returns true either if changes made to the source after a sendfile/splice call won't become
129+
/// visible in the sink or the source has explicitly opted into such behavior (e.g. by splicing
130+
/// a file into a pipe, the pipe being the source in this case).
131+
///
132+
/// This will prevent File -> Pipe and File -> Socket splicing/sendfile optimizations to uphold
133+
/// the Read/Write API semantics of io::copy.
134+
///
135+
/// Note: This is not 100% airtight, the caller can use the RawFd conversion methods to turn a
136+
/// regular file into a TcpSocket which will be treated as a socket here without checking.
137+
fn safe_kernel_copy(source: &FdMeta, sink: &FdMeta) -> bool {
138+
match (source, sink) {
139+
// Data arriving from a socket is safe because the sender can't modify the socket buffer.
140+
// Data arriving from a pipe is safe(-ish) because either the sender *copied*
141+
// the bytes into the pipe OR explicitly performed an operation that enables zero-copy,
142+
// thus promising not to modify the data later.
143+
(FdMeta::Socket, _) => true,
144+
(FdMeta::Pipe, _) => true,
145+
(FdMeta::Metadata(meta), _)
146+
if meta.file_type().is_fifo() || meta.file_type().is_socket() =>
147+
{
148+
true
149+
}
150+
// Data going into non-pipes/non-sockets is safe because the "later changes may become visible" issue
151+
// only happens for pages sitting in send buffers or pipes.
152+
(_, FdMeta::Metadata(meta))
153+
if !meta.file_type().is_fifo() && !meta.file_type().is_socket() =>
154+
{
155+
true
156+
}
157+
_ => false,
158+
}
159+
}
160+
134161
struct CopyParams(FdMeta, Option<RawFd>);
135162

136163
struct Copier<'a, 'b, R: Read + ?Sized, W: Write + ?Sized> {
@@ -186,7 +213,8 @@ impl<R: CopyRead, W: CopyWrite> SpecCopy for Copier<'_, '_, R, W> {
186213
// So we just try and fallback if needed.
187214
// If current file offsets + write sizes overflow it may also fail, we do not try to fix that and instead
188215
// fall back to the generic copy loop.
189-
if input_meta.potential_sendfile_source() {
216+
if input_meta.potential_sendfile_source() && safe_kernel_copy(&input_meta, &output_meta)
217+
{
190218
let result = sendfile_splice(SpliceMode::Sendfile, readfd, writefd, max_write);
191219
result.update_take(reader);
192220

@@ -197,7 +225,9 @@ impl<R: CopyRead, W: CopyWrite> SpecCopy for Copier<'_, '_, R, W> {
197225
}
198226
}
199227

200-
if input_meta.maybe_fifo() || output_meta.maybe_fifo() {
228+
if (input_meta.maybe_fifo() || output_meta.maybe_fifo())
229+
&& safe_kernel_copy(&input_meta, &output_meta)
230+
{
201231
let result = sendfile_splice(SpliceMode::Splice, readfd, writefd, max_write);
202232
result.update_take(reader);
203233

@@ -298,13 +328,13 @@ impl CopyRead for &File {
298328

299329
impl CopyWrite for File {
300330
fn properties(&self) -> CopyParams {
301-
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
331+
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
302332
}
303333
}
304334

305335
impl CopyWrite for &File {
306336
fn properties(&self) -> CopyParams {
307-
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
337+
CopyParams(fd_to_meta(*self), Some(self.as_raw_fd()))
308338
}
309339
}
310340

@@ -401,13 +431,13 @@ impl CopyRead for StdinLock<'_> {
401431

402432
impl CopyWrite for StdoutLock<'_> {
403433
fn properties(&self) -> CopyParams {
404-
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
434+
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
405435
}
406436
}
407437

408438
impl CopyWrite for StderrLock<'_> {
409439
fn properties(&self) -> CopyParams {
410-
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
440+
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
411441
}
412442
}
413443

‎library/std/src/sys/unix/kernel_copy/tests.rs

+42
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,48 @@ fn copies_append_mode_sink() -> Result<()> {
8383
Ok(())
8484
}
8585

86+
#[test]
87+
fn dont_splice_pipes_from_files() -> Result<()> {
88+
// splicing to a pipe and then modifying the source could lead to changes
89+
// becoming visible in an unexpected order.
90+
91+
use crate::io::SeekFrom;
92+
use crate::os::unix::fs::FileExt;
93+
use crate::process::{ChildStdin, ChildStdout};
94+
use crate::sys_common::FromInner;
95+
96+
let (read_end, write_end) = crate::sys::pipe::anon_pipe()?;
97+
98+
let mut read_end = ChildStdout::from_inner(read_end);
99+
let mut write_end = ChildStdin::from_inner(write_end);
100+
101+
let tmp_path = tmpdir();
102+
let file = tmp_path.join("to_be_modified");
103+
let mut file =
104+
crate::fs::OpenOptions::new().create_new(true).read(true).write(true).open(file)?;
105+
106+
const SZ: usize = libc::PIPE_BUF as usize;
107+
108+
// put data in page cache
109+
let mut buf: [u8; SZ] = [0x01; SZ];
110+
file.write_all(&buf).unwrap();
111+
112+
// copy page into pipe
113+
file.seek(SeekFrom::Start(0)).unwrap();
114+
assert!(io::copy(&mut file, &mut write_end).unwrap() == SZ as u64);
115+
116+
// modify file
117+
buf[0] = 0x02;
118+
file.write_at(&buf, 0).unwrap();
119+
120+
// read from pipe
121+
read_end.read_exact(buf.as_mut_slice()).unwrap();
122+
123+
assert_eq!(buf[0], 0x01, "data in pipe should reflect the original, not later modifications");
124+
125+
Ok(())
126+
}
127+
86128
#[bench]
87129
fn bench_file_to_file_copy(b: &mut test::Bencher) {
88130
const BYTES: usize = 128 * 1024;

0 commit comments

Comments
 (0)
Please sign in to comment.