From 9b9c24ec7f7c850d0c92babeb45a5ccff940a2c1 Mon Sep 17 00:00:00 2001 From: John Kugelman Date: Tue, 21 Sep 2021 21:14:32 -0400 Subject: [PATCH] Fix read_to_end to not grow an exact size buffer If you know how much data to expect and use `Vec::with_capacity` to pre-allocate a buffer of that capacity, `Read::read_to_end` will still double its capacity. It needs some space to perform a read, even though that read ends up returning `0`. It's a bummer to carefully pre-allocate 1GB to read a 1GB file into memory and end up using 2GB. This fixes that behavior by special casing a full buffer and reading into a small "probe" buffer instead. If that read returns `0` then it's confirmed that the buffer was the perfect size. If it doesn't, the probe buffer is appended to the normal buffer and the read loop continues. Fixing this allows several workarounds in the standard library to be removed: - `Take` no longer needs to override `Read::read_to_end`. - The `reservation_size` callback that allowed `Take` to inhibit the previous over-allocation behavior isn't needed. - `fs::read` doesn't need to reserve an extra byte in `initial_buffer_size`. Curiously, there was a unit test that specifically checked that `Read::read_to_end` *does* over-allocate. I removed that test, too. --- library/std/src/fs.rs | 7 +++-- library/std/src/io/mod.rs | 53 ++++++++++++++++++++++--------------- library/std/src/io/tests.rs | 18 +++---------- 3 files changed, 37 insertions(+), 41 deletions(-) diff --git a/library/std/src/fs.rs b/library/std/src/fs.rs index bdb172907ffed..bcc1f9a8b94dd 100644 --- a/library/std/src/fs.rs +++ b/library/std/src/fs.rs @@ -200,10 +200,9 @@ pub struct DirBuilder { /// Indicates how large a buffer to pre-allocate before reading the entire file. fn initial_buffer_size(file: &File) -> usize { - // Allocate one extra byte so the buffer doesn't need to grow before the - // final `read` call at the end of the file. Don't worry about `usize` - // overflow because reading will fail regardless in that case. - file.metadata().map(|m| m.len() as usize + 1).unwrap_or(0) + // Don't worry about `usize` overflow because reading will fail regardless + // in that case. + file.metadata().map(|m| m.len() as usize).unwrap_or(0) } /// Read the entire contents of a file into a bytes vector. diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 4a35d36a9def7..52b04680745c9 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -362,22 +362,18 @@ where // Because we're extending the buffer with uninitialized data for trusted // readers, we need to make sure to truncate that if any of this panics. fn read_to_end(r: &mut R, buf: &mut Vec) -> Result { - read_to_end_with_reservation(r, buf, |_| 32) -} - -fn read_to_end_with_reservation( - r: &mut R, - buf: &mut Vec, - mut reservation_size: F, -) -> Result -where - R: Read + ?Sized, - F: FnMut(&R) -> usize, -{ let start_len = buf.len(); + let start_cap = buf.capacity(); let mut g = Guard { len: buf.len(), buf }; loop { - if g.len == g.buf.len() { + // If we've read all the way up to the capacity, reserve more space. + if g.len == g.buf.capacity() { + g.buf.reserve(32); + } + + // Initialize any excess capacity and adjust the length so we can write + // to it. + if g.buf.len() < g.buf.capacity() { unsafe { // FIXME(danielhenrymantilla): #42788 // @@ -387,7 +383,6 @@ where // - Only the standard library gets to soundly "ignore" this, // based on its privileged knowledge of unstable rustc // internals; - g.buf.reserve(reservation_size(r)); let capacity = g.buf.capacity(); g.buf.set_len(capacity); r.initializer().initialize(&mut g.buf[g.len..]); @@ -404,9 +399,30 @@ where assert!(n <= buf.len()); g.len += n; } - Err(ref e) if e.kind() == ErrorKind::Interrupted => {} + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, Err(e) => return Err(e), } + + if g.len == g.buf.capacity() && g.buf.capacity() == start_cap { + // The buffer might be an exact fit. Let's read into a probe buffer + // and see if it returns `Ok(0)`. If so, we've avoided an + // unnecessary doubling of the capacity. But if not, append the + // probe buffer to the primary buffer and let its capacity grow. + let mut probe = [0u8; 32]; + + loop { + match r.read(&mut probe) { + Ok(0) => return Ok(g.len - start_len), + Ok(n) => { + g.buf.extend_from_slice(&probe[..n]); + g.len += n; + break; + } + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + } + } + } } } @@ -2583,13 +2599,6 @@ impl Read for Take { unsafe fn initializer(&self) -> Initializer { self.inner.initializer() } - - fn read_to_end(&mut self, buf: &mut Vec) -> Result { - // Pass in a reservation_size closure that respects the current value - // of limit for each read. If we hit the read limit, this prevents the - // final zero-byte read from allocating again. - read_to_end_with_reservation(self, buf, |self_| cmp::min(self_.limit, 32) as usize) - } } #[stable(feature = "rust1", since = "1.0.0")] diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index 1beb72a9a5072..58b2d1d14c927 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -362,24 +362,12 @@ impl<'a> Read for ExampleSliceReader<'a> { fn test_read_to_end_capacity() -> io::Result<()> { let input = &b"foo"[..]; - // read_to_end() generally needs to over-allocate, both for efficiency - // and so that it can distinguish EOF. Assert that this is the case - // with this simple ExampleSliceReader struct, which uses the default - // implementation of read_to_end. Even though vec1 is allocated with - // exactly enough capacity for the read, read_to_end will allocate more - // space here. + // read_to_end() takes care not to over-allocate when a buffer is the + // exact size needed. let mut vec1 = Vec::with_capacity(input.len()); ExampleSliceReader { slice: input }.read_to_end(&mut vec1)?; assert_eq!(vec1.len(), input.len()); - assert!(vec1.capacity() > input.len(), "allocated more"); - - // However, std::io::Take includes an implementation of read_to_end - // that will not allocate when the limit has already been reached. In - // this case, vec2 never grows. - let mut vec2 = Vec::with_capacity(input.len()); - ExampleSliceReader { slice: input }.take(input.len() as u64).read_to_end(&mut vec2)?; - assert_eq!(vec2.len(), input.len()); - assert_eq!(vec2.capacity(), input.len(), "did not allocate more"); + assert_eq!(vec1.capacity(), input.len(), "did not allocate more"); Ok(()) }