diff --git a/library/core/src/io/borrowed_buf.rs b/library/core/src/io/borrowed_buf.rs index b765b96fd00a5..088dea7812945 100644 --- a/library/core/src/io/borrowed_buf.rs +++ b/library/core/src/io/borrowed_buf.rs @@ -2,26 +2,27 @@ use crate::fmt::{self, Debug, Formatter}; use crate::mem::{self, MaybeUninit}; +use crate::{cmp, ptr}; -/// A borrowed buffer of initially uninitialized bytes, which is incrementally filled. +/// A borrowed byte buffer which is incrementally filled and initialized. /// -/// This type makes it safer to work with `MaybeUninit` buffers, such as to read into a buffer -/// without having to initialize it first. It tracks the region of bytes that have been filled and -/// the region that remains uninitialized. +/// This type is a sort of "double cursor". It tracks three regions in the buffer: a region at the beginning of the +/// buffer that has been logically filled with data, a region that has been initialized at some point but not yet +/// logically filled, and a region at the end that is fully uninitialized. The filled region is guaranteed to be a +/// subset of the initialized region. /// -/// The contents of the buffer can be visualized as: +/// In summary, the contents of the buffer can be visualized as: /// ```not_rust -/// [ capacity ] -/// [ len: filled and initialized | capacity - len: uninitialized ] +/// [ capacity ] +/// [ filled | unfilled ] +/// [ initialized | uninitialized ] /// ``` /// -/// Note that `BorrowedBuf` does not distinguish between uninitialized data and data that was -/// previously initialized but no longer contains valid data. -/// -/// A `BorrowedBuf` is created around some existing data (or capacity for data) via a unique -/// reference (`&mut`). The `BorrowedBuf` can be configured (e.g., using `clear` or `set_len`), but -/// cannot be directly written. To write into the buffer, use `unfilled` to create a -/// `BorrowedCursor`. The cursor has write-only access to the unfilled portion of the buffer. +/// A `BorrowedBuf` is created around some existing data (or capacity for data) via a unique reference +/// (`&mut`). The `BorrowedBuf` can be configured (e.g., using `clear` or `set_init`), but cannot be +/// directly written. To write into the buffer, use `unfilled` to create a `BorrowedCursor`. The cursor +/// has write-only access to the unfilled portion of the buffer (you can think of it as a +/// write-only iterator). /// /// The lifetime `'data` is a bound on the lifetime of the underlying data. pub struct BorrowedBuf<'data> { @@ -29,11 +30,14 @@ pub struct BorrowedBuf<'data> { buf: &'data mut [MaybeUninit], /// The length of `self.buf` which is known to be filled. filled: usize, + /// The length of `self.buf` which is known to be initialized. + init: usize, } impl Debug for BorrowedBuf<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("BorrowedBuf") + .field("init", &self.init) .field("filled", &self.filled) .field("capacity", &self.capacity()) .finish() @@ -44,22 +48,24 @@ impl Debug for BorrowedBuf<'_> { impl<'data> From<&'data mut [u8]> for BorrowedBuf<'data> { #[inline] fn from(slice: &'data mut [u8]) -> BorrowedBuf<'data> { + let len = slice.len(); + BorrowedBuf { - // SAFETY: Always in bounds. We treat the buffer as uninitialized, even though it's - // already initialized. + // SAFETY: initialized data never becoming uninitialized is an invariant of BorrowedBuf buf: unsafe { (slice as *mut [u8]).as_uninit_slice_mut().unwrap() }, filled: 0, + init: len, } } } /// Creates a new `BorrowedBuf` from an uninitialized buffer. /// -/// Use `set_filled` if part of the buffer is known to be already filled. +/// Use `set_init` if part of the buffer is known to be already initialized. impl<'data> From<&'data mut [MaybeUninit]> for BorrowedBuf<'data> { #[inline] fn from(buf: &'data mut [MaybeUninit]) -> BorrowedBuf<'data> { - BorrowedBuf { buf, filled: 0 } + BorrowedBuf { buf, filled: 0, init: 0 } } } @@ -68,11 +74,14 @@ impl<'data> From<&'data mut [MaybeUninit]> for BorrowedBuf<'data> { /// Use `BorrowedCursor::with_unfilled_buf` instead for a safer alternative. impl<'data> From> for BorrowedBuf<'data> { #[inline] - fn from(buf: BorrowedCursor<'data>) -> BorrowedBuf<'data> { + fn from(mut buf: BorrowedCursor<'data>) -> BorrowedBuf<'data> { + let init = buf.init_mut().len(); BorrowedBuf { - // SAFETY: Always in bounds. We treat the buffer as uninitialized. + // SAFETY: no initialized byte is ever uninitialized as per + // `BorrowedBuf`'s invariant buf: unsafe { buf.buf.buf.get_unchecked_mut(buf.buf.filled..) }, filled: 0, + init, } } } @@ -90,6 +99,12 @@ impl<'data> BorrowedBuf<'data> { self.filled } + /// Returns the length of the initialized part of the buffer. + #[inline] + pub fn init_len(&self) -> usize { + self.init + } + /// Returns a shared reference to the filled portion of the buffer. #[inline] pub fn filled(&self) -> &[u8] { @@ -144,16 +159,33 @@ impl<'data> BorrowedBuf<'data> { /// Clears the buffer, resetting the filled region to empty. /// - /// The contents of the buffer are not modified. + /// The number of initialized bytes is not changed, and the contents of the buffer are not modified. #[inline] pub fn clear(&mut self) -> &mut Self { self.filled = 0; self } + + /// Asserts that the first `n` bytes of the buffer are initialized. + /// + /// `BorrowedBuf` assumes that bytes are never de-initialized, so this method does nothing when called with fewer + /// bytes than are already known to be initialized. + /// + /// # Safety + /// + /// The caller must ensure that the first `n` unfilled bytes of the buffer have already been initialized. + #[inline] + pub unsafe fn set_init(&mut self, n: usize) -> &mut Self { + self.init = cmp::max(self.init, n); + self + } } /// A writeable view of the unfilled portion of a [`BorrowedBuf`]. /// +/// The unfilled portion consists of an initialized and an uninitialized part; see [`BorrowedBuf`] +/// for details. +/// /// Data can be written directly to the cursor by using [`append`](BorrowedCursor::append) or /// indirectly by getting a slice of part or all of the cursor and writing into the slice. In the /// indirect case, the caller must call [`advance`](BorrowedCursor::advance) after writing to inform @@ -206,17 +238,48 @@ impl<'a> BorrowedCursor<'a> { self.buf.filled } + /// Returns a mutable reference to the initialized portion of the cursor. + #[inline] + pub fn init_mut(&mut self) -> &mut [u8] { + // SAFETY: We only slice the initialized part of the buffer, which is always valid + unsafe { + let buf = self.buf.buf.get_unchecked_mut(self.buf.filled..self.buf.init); + buf.assume_init_mut() + } + } + /// Returns a mutable reference to the whole cursor. /// /// # Safety /// - /// The caller must not uninitialize any previously initialized bytes. + /// The caller must not uninitialize any bytes in the initialized portion of the cursor. #[inline] pub unsafe fn as_mut(&mut self) -> &mut [MaybeUninit] { // SAFETY: always in bounds unsafe { self.buf.buf.get_unchecked_mut(self.buf.filled..) } } + /// Advances the cursor by asserting that `n` bytes have been filled. + /// + /// After advancing, the `n` bytes are no longer accessible via the cursor and can only be + /// accessed via the underlying buffer. I.e., the buffer's filled portion grows by `n` elements + /// and its unfilled portion (and the capacity of this cursor) shrinks by `n` elements. + /// + /// If less than `n` bytes initialized (by the cursor's point of view), `set_init` should be + /// called first. + /// + /// # Panics + /// + /// Panics if there are less than `n` bytes initialized. + #[inline] + pub fn advance(&mut self, n: usize) -> &mut Self { + // The subtraction cannot underflow by invariant of this type. + assert!(n <= self.buf.init - self.buf.filled); + + self.buf.filled += n; + self + } + /// Advances the cursor by asserting that `n` bytes have been filled. /// /// After advancing, the `n` bytes are no longer accessible via the cursor and can only be @@ -225,11 +288,42 @@ impl<'a> BorrowedCursor<'a> { /// /// # Safety /// - /// The caller must ensure that the first `n` bytes of the cursor have been initialized. `n` - /// must not exceed the remaining capacity of this cursor. + /// The caller must ensure that the first `n` bytes of the cursor have been properly + /// initialised. #[inline] - pub unsafe fn advance(&mut self, n: usize) -> &mut Self { + pub unsafe fn advance_unchecked(&mut self, n: usize) -> &mut Self { self.buf.filled += n; + self.buf.init = cmp::max(self.buf.init, self.buf.filled); + self + } + + /// Initializes all bytes in the cursor. + #[inline] + pub fn ensure_init(&mut self) -> &mut Self { + // SAFETY: always in bounds and we never uninitialize these bytes. + let uninit = unsafe { self.buf.buf.get_unchecked_mut(self.buf.init..) }; + + // SAFETY: 0 is a valid value for MaybeUninit and the length matches the allocation + // since it is comes from a slice reference. + unsafe { + ptr::write_bytes(uninit.as_mut_ptr(), 0, uninit.len()); + } + self.buf.init = self.buf.capacity(); + + self + } + + /// Asserts that the first `n` unfilled bytes of the cursor are initialized. + /// + /// `BorrowedBuf` assumes that bytes are never de-initialized, so this method does nothing when + /// called with fewer bytes than are already known to be initialized. + /// + /// # Safety + /// + /// The caller must ensure that the first `n` bytes of the buffer have already been initialized. + #[inline] + pub unsafe fn set_init(&mut self, n: usize) -> &mut Self { + self.buf.init = cmp::max(self.buf.init, self.buf.filled + n); self } @@ -247,6 +341,10 @@ impl<'a> BorrowedCursor<'a> { self.as_mut()[..buf.len()].write_copy_of_slice(buf); } + // SAFETY: We just added the entire contents of buf to the filled section. + unsafe { + self.set_init(buf.len()); + } self.buf.filled += buf.len(); } @@ -269,9 +367,17 @@ impl<'a> BorrowedCursor<'a> { // there, one could mark some bytes as initialized even though there aren't. assert!(core::ptr::addr_eq(prev_ptr, buf.buf)); - // SAFETY: These bytes were filled in the `BorrowedBuf`, so they're filled in the cursor - // too, because the buffer wasn't replaced. - self.buf.filled += buf.filled; + let filled = buf.filled; + let init = buf.init; + + // Update `init` and `filled` fields with what was written to the buffer. + // `self.buf.filled` was the starting length of the `BorrowedBuf`. + // + // SAFETY: These amounts of bytes were initialized/filled in the `BorrowedBuf`, + // and therefore they are initialized/filled in the cursor too, because the + // buffer wasn't replaced. + self.buf.init = self.buf.filled + init; + self.buf.filled += filled; res } diff --git a/library/coretests/tests/io/borrowed_buf.rs b/library/coretests/tests/io/borrowed_buf.rs index 730ba04465a11..aaa98d26ff8b9 100644 --- a/library/coretests/tests/io/borrowed_buf.rs +++ b/library/coretests/tests/io/borrowed_buf.rs @@ -8,6 +8,7 @@ fn new() { let mut rbuf: BorrowedBuf<'_> = buf.into(); assert_eq!(rbuf.filled().len(), 0); + assert_eq!(rbuf.init_len(), 16); assert_eq!(rbuf.capacity(), 16); assert_eq!(rbuf.unfilled().capacity(), 16); } @@ -19,16 +20,27 @@ fn uninit() { let mut rbuf: BorrowedBuf<'_> = buf.into(); assert_eq!(rbuf.filled().len(), 0); + assert_eq!(rbuf.init_len(), 0); assert_eq!(rbuf.capacity(), 16); assert_eq!(rbuf.unfilled().capacity(), 16); } +#[test] +fn initialize_unfilled() { + let buf: &mut [_] = &mut [MaybeUninit::uninit(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + rbuf.unfilled().ensure_init(); + + assert_eq!(rbuf.init_len(), 16); +} + #[test] fn advance_filled() { let buf: &mut [_] = &mut [0; 16]; let mut rbuf: BorrowedBuf<'_> = buf.into(); - unsafe { rbuf.unfilled().advance(1) }; + rbuf.unfilled().advance(1); assert_eq!(rbuf.filled().len(), 1); assert_eq!(rbuf.unfilled().capacity(), 15); @@ -39,7 +51,7 @@ fn clear() { let buf: &mut [_] = &mut [255; 16]; let mut rbuf: BorrowedBuf<'_> = buf.into(); - unsafe { rbuf.unfilled().advance(16) }; + rbuf.unfilled().advance(16); assert_eq!(rbuf.filled().len(), 16); assert_eq!(rbuf.unfilled().capacity(), 0); @@ -49,9 +61,33 @@ fn clear() { assert_eq!(rbuf.filled().len(), 0); assert_eq!(rbuf.unfilled().capacity(), 16); - unsafe { rbuf.unfilled().advance(16) }; + assert_eq!(rbuf.unfilled().init_mut(), [255; 16]); +} + +#[test] +fn set_init() { + let buf: &mut [_] = &mut [MaybeUninit::zeroed(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + unsafe { + rbuf.set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); + + rbuf.unfilled().advance(4); - assert_eq!(rbuf.filled(), [255; 16]); + unsafe { + rbuf.set_init(2); + } + + assert_eq!(rbuf.init_len(), 8); + + unsafe { + rbuf.set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); } #[test] @@ -61,6 +97,7 @@ fn append() { rbuf.unfilled().append(&[0; 8]); + assert_eq!(rbuf.init_len(), 8); assert_eq!(rbuf.filled().len(), 8); assert_eq!(rbuf.filled(), [0; 8]); @@ -68,6 +105,7 @@ fn append() { rbuf.unfilled().append(&[1; 16]); + assert_eq!(rbuf.init_len(), 16); assert_eq!(rbuf.filled().len(), 16); assert_eq!(rbuf.filled(), [1; 16]); } @@ -87,12 +125,43 @@ fn reborrow_written() { assert_eq!(cursor.written(), 32); assert_eq!(buf.unfilled().written(), 32); + assert_eq!(buf.init_len(), 32); assert_eq!(buf.filled().len(), 32); let filled = buf.filled(); assert_eq!(&filled[..16], [1; 16]); assert_eq!(&filled[16..], [2; 16]); } +#[test] +fn cursor_set_init() { + let buf: &mut [_] = &mut [MaybeUninit::zeroed(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + unsafe { + rbuf.unfilled().set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); + assert_eq!(rbuf.unfilled().init_mut().len(), 8); + assert_eq!(unsafe { rbuf.unfilled().as_mut().len() }, 16); + + rbuf.unfilled().advance(4); + + unsafe { + rbuf.unfilled().set_init(2); + } + + assert_eq!(rbuf.init_len(), 8); + + unsafe { + rbuf.unfilled().set_init(8); + } + + assert_eq!(rbuf.init_len(), 12); + assert_eq!(rbuf.unfilled().init_mut().len(), 8); + assert_eq!(unsafe { rbuf.unfilled().as_mut().len() }, 12); +} + #[test] fn cursor_with_unfilled_buf() { let buf: &mut [_] = &mut [MaybeUninit::uninit(); 16]; @@ -100,30 +169,31 @@ fn cursor_with_unfilled_buf() { let mut cursor = rbuf.unfilled(); cursor.with_unfilled_buf(|buf| { - assert_eq!(buf.capacity(), 16); buf.unfilled().append(&[1, 2, 3]); assert_eq!(buf.filled(), &[1, 2, 3]); }); + assert_eq!(cursor.init_mut().len(), 0); assert_eq!(cursor.written(), 3); cursor.with_unfilled_buf(|buf| { assert_eq!(buf.capacity(), 13); + assert_eq!(buf.init_len(), 0); - unsafe { - buf.unfilled().as_mut().write_filled(0); - buf.unfilled().advance(4) - }; + buf.unfilled().ensure_init(); + buf.unfilled().advance(4); }); + assert_eq!(cursor.init_mut().len(), 9); assert_eq!(cursor.written(), 7); cursor.with_unfilled_buf(|buf| { assert_eq!(buf.capacity(), 9); + assert_eq!(buf.init_len(), 9); }); + assert_eq!(cursor.init_mut().len(), 9); assert_eq!(cursor.written(), 7); - assert_eq!(rbuf.len(), 7); assert_eq!(rbuf.filled(), &[1, 2, 3, 0, 0, 0, 0]); } diff --git a/library/std/src/fs/tests.rs b/library/std/src/fs/tests.rs index bcaafcfee787a..0a5d1153d860c 100644 --- a/library/std/src/fs/tests.rs +++ b/library/std/src/fs/tests.rs @@ -709,6 +709,8 @@ fn file_test_read_buf() { let mut file = check!(File::open(filename)); check!(file.read_buf(buf.unfilled())); assert_eq!(buf.filled(), &[1, 2, 3, 4]); + // File::read_buf should omit buffer initialization. + assert_eq!(buf.init_len(), 4); check!(fs::remove_file(filename)); } diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs index 69c260b5410af..40441dc057d0d 100644 --- a/library/std/src/io/buffered/bufreader.rs +++ b/library/std/src/io/buffered/bufreader.rs @@ -284,6 +284,15 @@ impl BufReader { } } +// This is only used by a test which asserts that the initialization-tracking is correct. +#[cfg(test)] +impl BufReader { + #[allow(missing_docs)] + pub fn initialized(&self) -> usize { + self.buf.initialized() + } +} + impl BufReader { /// Seeks relative to the current position. If the new position lies within the buffer, /// the buffer will not be flushed, allowing for more efficient seeks. diff --git a/library/std/src/io/buffered/bufreader/buffer.rs b/library/std/src/io/buffered/bufreader/buffer.rs index 2694726b3f44f..9b600cd55758b 100644 --- a/library/std/src/io/buffered/bufreader/buffer.rs +++ b/library/std/src/io/buffered/bufreader/buffer.rs @@ -21,19 +21,25 @@ pub struct Buffer { // Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are // initialized with bytes from a read. filled: usize, + // This is the max number of bytes returned across all `fill_buf` calls. We track this so that we + // can accurately tell `read_buf` how many bytes of buf are initialized, to bypass as much of its + // defensive initialization as possible. Note that while this often the same as `filled`, it + // doesn't need to be. Calls to `fill_buf` are not required to actually fill the buffer, and + // omitting this is a huge perf regression for `Read` impls that do not. + initialized: usize, } impl Buffer { #[inline] pub fn with_capacity(capacity: usize) -> Self { let buf = Box::new_uninit_slice(capacity); - Self { buf, pos: 0, filled: 0 } + Self { buf, pos: 0, filled: 0, initialized: 0 } } #[inline] pub fn try_with_capacity(capacity: usize) -> io::Result { match Box::try_new_uninit_slice(capacity) { - Ok(buf) => Ok(Self { buf, pos: 0, filled: 0 }), + Ok(buf) => Ok(Self { buf, pos: 0, filled: 0, initialized: 0 }), Err(_) => { Err(io::const_error!(ErrorKind::OutOfMemory, "failed to allocate read buffer")) } @@ -62,6 +68,12 @@ impl Buffer { self.pos } + // This is only used by a test which asserts that the initialization-tracking is correct. + #[cfg(test)] + pub fn initialized(&self) -> usize { + self.initialized + } + #[inline] pub fn discard_buffer(&mut self) { self.pos = 0; @@ -98,8 +110,13 @@ impl Buffer { /// Read more bytes into the buffer without discarding any of its contents pub fn read_more(&mut self, mut reader: impl Read) -> io::Result { let mut buf = BorrowedBuf::from(&mut self.buf[self.filled..]); + let old_init = self.initialized - self.filled; + unsafe { + buf.set_init(old_init); + } reader.read_buf(buf.unfilled())?; self.filled += buf.len(); + self.initialized += buf.init_len() - old_init; Ok(buf.len()) } @@ -120,10 +137,16 @@ impl Buffer { debug_assert!(self.pos == self.filled); let mut buf = BorrowedBuf::from(&mut *self.buf); + // SAFETY: `self.filled` bytes will always have been initialized. + unsafe { + buf.set_init(self.initialized); + } + let result = reader.read_buf(buf.unfilled()); self.pos = 0; self.filled = buf.len(); + self.initialized = buf.init_len(); result?; } diff --git a/library/std/src/io/buffered/tests.rs b/library/std/src/io/buffered/tests.rs index fc77b02a8e828..6ad4158b92904 100644 --- a/library/std/src/io/buffered/tests.rs +++ b/library/std/src/io/buffered/tests.rs @@ -1052,6 +1052,30 @@ fn single_formatted_write() { assert_eq!(writer.get_ref().events, [RecordedEvent::Write("hello, world!\n".to_string())]); } +#[test] +fn bufreader_full_initialize() { + struct OneByteReader; + impl Read for OneByteReader { + fn read(&mut self, buf: &mut [u8]) -> crate::io::Result { + if buf.len() > 0 { + buf[0] = 0; + Ok(1) + } else { + Ok(0) + } + } + } + let mut reader = BufReader::new(OneByteReader); + // Nothing is initialized yet. + assert_eq!(reader.initialized(), 0); + + let buf = reader.fill_buf().unwrap(); + // We read one byte... + assert_eq!(buf.len(), 1); + // But we initialized the whole buffer! + assert_eq!(reader.initialized(), reader.capacity()); +} + /// This is a regression test for https://github.com/rust-lang/rust/issues/127584. #[test] fn bufwriter_aliasing() { diff --git a/library/std/src/io/copy.rs b/library/std/src/io/copy.rs index 8b5e7c4df4e08..2b558efb8885e 100644 --- a/library/std/src/io/copy.rs +++ b/library/std/src/io/copy.rs @@ -214,19 +214,28 @@ impl BufferedWriterSpec for BufWriter { } let mut len = 0; + let mut init = 0; loop { let buf = self.buffer_mut(); let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into(); + unsafe { + // SAFETY: init is either 0 or the init_len from the previous iteration. + read_buf.set_init(init); + } + if read_buf.capacity() >= DEFAULT_BUF_SIZE { let mut cursor = read_buf.unfilled(); match reader.read_buf(cursor.reborrow()) { Ok(()) => { let bytes_read = cursor.written(); + if bytes_read == 0 { return Ok(len); } + + init = read_buf.init_len() - bytes_read; len += bytes_read as u64; // SAFETY: BorrowedBuf guarantees all of its filled bytes are init @@ -239,6 +248,10 @@ impl BufferedWriterSpec for BufWriter { Err(e) => return Err(e), } } else { + // All the bytes that were already in the buffer are initialized, + // treat them as such when the buffer is flushed. + init += buf.len(); + self.flush_buf()?; } } diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 4c064c435e5bc..b7756befa11e9 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -419,6 +419,8 @@ pub(crate) fn default_read_to_end( .and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE)) .unwrap_or(DEFAULT_BUF_SIZE); + let mut initialized = 0; // Extra initialized bytes from previous loop iteration + const PROBE_SIZE: usize = 32; fn small_probe_read(r: &mut R, buf: &mut Vec) -> Result { @@ -447,6 +449,8 @@ pub(crate) fn default_read_to_end( } } + let mut consecutive_short_reads = 0; + loop { if buf.len() == buf.capacity() && buf.capacity() == start_cap { // The buffer might be an exact fit. Let's read into a probe buffer @@ -470,6 +474,11 @@ pub(crate) fn default_read_to_end( spare = &mut spare[..buf_len]; let mut read_buf: BorrowedBuf<'_> = spare.into(); + // SAFETY: These bytes were initialized but not filled in the previous loop + unsafe { + read_buf.set_init(initialized); + } + let mut cursor = read_buf.unfilled(); let result = loop { match r.read_buf(cursor.reborrow()) { @@ -480,7 +489,9 @@ pub(crate) fn default_read_to_end( } }; + let unfilled_but_initialized = cursor.init_mut().len(); let bytes_read = cursor.written(); + let was_fully_initialized = read_buf.init_len() == buf_len; // SAFETY: BorrowedBuf's invariants mean this much memory is initialized. unsafe { @@ -495,8 +506,27 @@ pub(crate) fn default_read_to_end( return Ok(buf.len() - start_len); } + if bytes_read < buf_len { + consecutive_short_reads += 1; + } else { + consecutive_short_reads = 0; + } + + // store how much was initialized but not filled + initialized = unfilled_but_initialized; + // Use heuristics to determine the max read size if no initial size hint was provided if size_hint.is_none() { + // The reader is returning short reads but it doesn't call ensure_init(). + // In that case we no longer need to restrict read sizes to avoid + // initialization costs. + // When reading from disk we usually don't get any short reads except at EOF. + // So we wait for at least 2 short reads before uncapping the read buffer; + // this helps with the Windows issue. + if !was_fully_initialized && consecutive_short_reads > 1 { + max_read_size = usize::MAX; + } + // we have passed a larger buffer than previously and the // reader still hasn't returned a short read if buf_len >= max_read_size && bytes_read == buf_len { @@ -557,13 +587,8 @@ pub(crate) fn default_read_buf(read: F, mut cursor: BorrowedCursor<'_>) -> Re where F: FnOnce(&mut [u8]) -> Result, { - // SAFETY: We do not uninitialize any part of the buffer. - let n = read(unsafe { cursor.as_mut().write_filled(0) })?; - assert!(n <= cursor.capacity()); - // SAFETY: We've initialized the entire buffer, and `read` can't make it uninitialized. - unsafe { - cursor.advance(n); - } + let n = read(cursor.ensure_init().init_mut())?; + cursor.advance(n); Ok(()) } @@ -3073,21 +3098,31 @@ impl Read for Take { // The condition above guarantees that `self.limit` fits in `usize`. let limit = self.limit as usize; + let extra_init = cmp::min(limit, buf.init_mut().len()); + // SAFETY: no uninit data is written to ibuf let ibuf = unsafe { &mut buf.as_mut()[..limit] }; let mut sliced_buf: BorrowedBuf<'_> = ibuf.into(); + // SAFETY: extra_init bytes of ibuf are known to be initialized + unsafe { + sliced_buf.set_init(extra_init); + } + let mut cursor = sliced_buf.unfilled(); let result = self.inner.read_buf(cursor.reborrow()); + let new_init = cursor.init_mut().len(); let filled = sliced_buf.len(); // cursor / sliced_buf / ibuf must drop here - // SAFETY: filled bytes have been filled and therefore initialized unsafe { - buf.advance(filled); + // SAFETY: filled bytes have been filled and therefore initialized + buf.advance_unchecked(filled); + // SAFETY: new_init bytes of buf's unfilled buffer have been initialized + buf.set_init(new_init); } self.limit -= filled as u64; diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index e14e6432eafaf..b22988d4a8a9d 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -209,6 +209,15 @@ fn read_buf_exact() { assert_eq!(c.read_buf_exact(buf.unfilled()).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); } +#[test] +#[should_panic] +fn borrowed_cursor_advance_overflow() { + let mut buf = [0; 512]; + let mut buf = BorrowedBuf::from(&mut buf[..]); + buf.unfilled().advance(1); + buf.unfilled().advance(usize::MAX); +} + #[test] fn take_eof() { struct R; diff --git a/library/std/src/io/util.rs b/library/std/src/io/util.rs index a09c8bc069306..0410df3ef1a3e 100644 --- a/library/std/src/io/util.rs +++ b/library/std/src/io/util.rs @@ -283,7 +283,7 @@ impl Read for Repeat { // SAFETY: No uninit bytes are being written. unsafe { buf.as_mut() }.write_filled(self.byte); // SAFETY: the entire unfilled portion of buf has been initialized. - unsafe { buf.advance(buf.capacity()) }; + unsafe { buf.advance_unchecked(buf.capacity()) }; Ok(()) } diff --git a/library/std/src/io/util/tests.rs b/library/std/src/io/util/tests.rs index 92dbc3919bea2..d0f106d7af416 100644 --- a/library/std/src/io/util/tests.rs +++ b/library/std/src/io/util/tests.rs @@ -75,36 +75,43 @@ fn empty_reads() { let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit()]; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); Read::by_ref(&mut e).read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [MaybeUninit<_>] = &mut []; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf_exact(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit()]; let mut buf: BorrowedBuf<'_> = buf.into(); assert_eq!(e.read_buf_exact(buf.unfilled()).unwrap_err().kind(), ErrorKind::UnexpectedEof); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); assert_eq!(e.read_buf_exact(buf.unfilled()).unwrap_err().kind(), ErrorKind::UnexpectedEof); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); @@ -113,6 +120,7 @@ fn empty_reads() { ErrorKind::UnexpectedEof, ); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let mut buf = Vec::new(); assert_eq!(e.read_to_end(&mut buf).unwrap(), 0); diff --git a/library/std/src/net/tcp/tests.rs b/library/std/src/net/tcp/tests.rs index 4787f8a1040b9..7c7ef7b2f7018 100644 --- a/library/std/src/net/tcp/tests.rs +++ b/library/std/src/net/tcp/tests.rs @@ -315,6 +315,8 @@ fn read_buf() { let mut buf = BorrowedBuf::from(buf.as_mut_slice()); t!(s.read_buf(buf.unfilled())); assert_eq!(buf.filled(), &[1, 2, 3, 4]); + // TcpStream::read_buf should omit buffer initialization. + assert_eq!(buf.init_len(), 4); t.join().ok().expect("thread panicked"); }) diff --git a/library/std/src/process/tests.rs b/library/std/src/process/tests.rs index c8a83edffe427..12c5130defe5a 100644 --- a/library/std/src/process/tests.rs +++ b/library/std/src/process/tests.rs @@ -188,8 +188,10 @@ fn child_stdout_read_buf() { // ChildStdout::read_buf should omit buffer initialization. if cfg!(target_os = "windows") { assert_eq!(buf.filled(), b"abc\r\n"); + assert_eq!(buf.init_len(), 5); } else { assert_eq!(buf.filled(), b"abc\n"); + assert_eq!(buf.init_len(), 4); }; } diff --git a/library/std/src/sys/fd/hermit.rs b/library/std/src/sys/fd/hermit.rs index 28fafdaf57d8a..2666da16420c4 100644 --- a/library/std/src/sys/fd/hermit.rs +++ b/library/std/src/sys/fd/hermit.rs @@ -33,7 +33,7 @@ impl FileDesc { ) })?; // SAFETY: Exactly `result` bytes have been filled. - unsafe { buf.advance(result as usize) }; + unsafe { buf.advance_unchecked(result as usize) }; Ok(()) } diff --git a/library/std/src/sys/fd/unix.rs b/library/std/src/sys/fd/unix.rs index c5e8646dada1e..bb6c0ac9e18e6 100644 --- a/library/std/src/sys/fd/unix.rs +++ b/library/std/src/sys/fd/unix.rs @@ -185,7 +185,7 @@ impl FileDesc { // SAFETY: `ret` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(ret as usize); + cursor.advance_unchecked(ret as usize); } Ok(()) } @@ -203,7 +203,7 @@ impl FileDesc { // SAFETY: `ret` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(ret as usize); + cursor.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/fs/solid.rs b/library/std/src/sys/fs/solid.rs index ec1db262855ad..f6d5d3b784d3b 100644 --- a/library/std/src/sys/fs/solid.rs +++ b/library/std/src/sys/fs/solid.rs @@ -401,7 +401,7 @@ impl File { // Safety: `num_bytes_read` bytes were written to the unfilled // portion of the buffer - cursor.advance(num_bytes_read); + cursor.advance_unchecked(num_bytes_read); Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/hermit.rs b/library/std/src/sys/net/connection/socket/hermit.rs index 8350d2b5fe4a0..c32f8dcc8de86 100644 --- a/library/std/src/sys/net/connection/socket/hermit.rs +++ b/library/std/src/sys/net/connection/socket/hermit.rs @@ -143,7 +143,7 @@ impl Socket { ) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/solid.rs b/library/std/src/sys/net/connection/socket/solid.rs index ac06cdc00c8f0..673d75046d3f2 100644 --- a/library/std/src/sys/net/connection/socket/solid.rs +++ b/library/std/src/sys/net/connection/socket/solid.rs @@ -190,7 +190,7 @@ impl Socket { netc::recv(self.as_raw_fd(), buf.as_mut().as_mut_ptr().cast(), buf.capacity(), flags) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/unix.rs b/library/std/src/sys/net/connection/socket/unix.rs index 323d6214347e7..6d06a8d86bf16 100644 --- a/library/std/src/sys/net/connection/socket/unix.rs +++ b/library/std/src/sys/net/connection/socket/unix.rs @@ -294,7 +294,7 @@ impl Socket { ) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/windows.rs b/library/std/src/sys/net/connection/socket/windows.rs index 4da51d78ea69b..7355f0ce6bf5e 100644 --- a/library/std/src/sys/net/connection/socket/windows.rs +++ b/library/std/src/sys/net/connection/socket/windows.rs @@ -243,7 +243,7 @@ impl Socket { } } _ => { - unsafe { buf.advance(result as usize) }; + unsafe { buf.advance_unchecked(result as usize) }; Ok(()) } } diff --git a/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs b/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs index f1e4a5a42577a..5041770faf661 100644 --- a/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs @@ -46,7 +46,7 @@ pub fn read_buf(fd: Fd, mut buf: BorrowedCursor<'_>) -> IoResult<()> { let mut userbuf = alloc::User::<[u8]>::uninitialized(buf.capacity()); let len = raw::read(fd, userbuf.as_mut_ptr().cast(), userbuf.len()).from_sgx_result()?; userbuf[..len].copy_to_enclave(&mut buf.as_mut()[..len]); - buf.advance(len); + buf.advance_unchecked(len); Ok(()) } } diff --git a/library/std/src/sys/pal/windows/handle.rs b/library/std/src/sys/pal/windows/handle.rs index ffa8507831acf..90e243e1aa038 100644 --- a/library/std/src/sys/pal/windows/handle.rs +++ b/library/std/src/sys/pal/windows/handle.rs @@ -117,7 +117,7 @@ impl Handle { Ok(read) => { // Safety: `read` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(read); + cursor.advance_unchecked(read); } Ok(()) } @@ -140,7 +140,7 @@ impl Handle { // SAFETY: `read` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(read); + cursor.advance_unchecked(read); } Ok(()) } diff --git a/library/std/src/sys/process/windows/child_pipe.rs b/library/std/src/sys/process/windows/child_pipe.rs index b848435ac275f..da7a86ca072e3 100644 --- a/library/std/src/sys/process/windows/child_pipe.rs +++ b/library/std/src/sys/process/windows/child_pipe.rs @@ -260,7 +260,7 @@ impl ChildPipe { Err(e) => Err(e), Ok(n) => { unsafe { - buf.advance(n); + buf.advance_unchecked(n); } Ok(()) } diff --git a/library/std/src/sys/stdio/zkvm.rs b/library/std/src/sys/stdio/zkvm.rs index 84496ac937363..f31c6c26e87cd 100644 --- a/library/std/src/sys/stdio/zkvm.rs +++ b/library/std/src/sys/stdio/zkvm.rs @@ -19,7 +19,7 @@ impl io::Read for Stdin { fn read_buf(&mut self, mut buf: BorrowedCursor<'_>) -> io::Result<()> { unsafe { let n = abi::sys_read(fileno::STDIN, buf.as_mut().as_mut_ptr().cast(), buf.capacity()); - buf.advance(n); + buf.advance_unchecked(n); } Ok(()) }