Skip to content

Commit 5c9c3c7

Browse files
committed
Auto merge of rust-lang#117925 - kornelski:read-to-oom, r=Amanieu
Handle out of memory errors in io:Read::read_to_end() rust-lang#116570 got stuck due to a [procedural confusion](rust-lang#116570 (comment)). Retrying so that it can get FCP with the proper team now. cc `@joshtriplett` `@BurntSushi` ---- I'd like to propose handling of out-of-memory errors in the default implementation of `io::Read::read_to_end()` and `fs::read()`. These methods create/grow a `Vec` with a size that is external to the program, and could be arbitrarily large. Due to being I/O methods, they can already fail in a variety of ways, in theory even including `ENOMEM` from the OS too, so another failure case should not surprise anyone. While this may not help much Linux with overcommit, it's useful for other platforms like WASM. [Internals thread](https://internals.rust-lang.org/t/io-read-read-to-end-should-handle-oom/19662). I've added documentation that makes it explicit that the OOM handling is a nice-to-have, and not a guarantee of the trait. I haven't changed the implementation of `impl Read for &[u8]` and `VecDeque` out of caution, because in these cases users could assume `read` can't fail. This code uses `try_reserve()` + `extend_from_slice()` which is optimized since rust-lang#117503.
2 parents 5518eaa + 60f4628 commit 5c9c3c7

File tree

4 files changed

+47
-7
lines changed

4 files changed

+47
-7
lines changed

library/std/src/fs.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,8 @@ pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
260260
fn inner(path: &Path) -> io::Result<Vec<u8>> {
261261
let mut file = File::open(path)?;
262262
let size = file.metadata().map(|m| m.len() as usize).ok();
263-
let mut bytes = Vec::with_capacity(size.unwrap_or(0));
263+
let mut bytes = Vec::new();
264+
bytes.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
264265
io::default_read_to_end(&mut file, &mut bytes, size)?;
265266
Ok(bytes)
266267
}
@@ -302,7 +303,8 @@ pub fn read_to_string<P: AsRef<Path>>(path: P) -> io::Result<String> {
302303
fn inner(path: &Path) -> io::Result<String> {
303304
let mut file = File::open(path)?;
304305
let size = file.metadata().map(|m| m.len() as usize).ok();
305-
let mut string = String::with_capacity(size.unwrap_or(0));
306+
let mut string = String::new();
307+
string.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
306308
io::default_read_to_string(&mut file, &mut string, size)?;
307309
Ok(string)
308310
}
@@ -774,14 +776,14 @@ impl Read for &File {
774776
// Reserves space in the buffer based on the file size when available.
775777
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
776778
let size = buffer_capacity_required(self);
777-
buf.reserve(size.unwrap_or(0));
779+
buf.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
778780
io::default_read_to_end(self, buf, size)
779781
}
780782

781783
// Reserves space in the buffer based on the file size when available.
782784
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
783785
let size = buffer_capacity_required(self);
784-
buf.reserve(size.unwrap_or(0));
786+
buf.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
785787
io::default_read_to_string(self, buf, size)
786788
}
787789
}

library/std/src/io/buffered/bufreader.rs

+1
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ impl<R: ?Sized + Read> Read for BufReader<R> {
345345
// delegate to the inner implementation.
346346
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
347347
let inner_buf = self.buffer();
348+
buf.try_reserve(inner_buf.len()).map_err(|_| io::ErrorKind::OutOfMemory)?;
348349
buf.extend_from_slice(inner_buf);
349350
let nread = inner_buf.len();
350351
self.discard_buffer();

library/std/src/io/impls.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,9 @@ impl Read for &[u8] {
303303

304304
#[inline]
305305
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
306-
buf.extend_from_slice(*self);
307306
let len = self.len();
307+
buf.try_reserve(len).map_err(|_| ErrorKind::OutOfMemory)?;
308+
buf.extend_from_slice(*self);
308309
*self = &self[len..];
309310
Ok(len)
310311
}
@@ -451,7 +452,7 @@ impl<A: Allocator> Read for VecDeque<u8, A> {
451452
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
452453
// The total len is known upfront so we can reserve it in a single call.
453454
let len = self.len();
454-
buf.reserve(len);
455+
buf.try_reserve(len).map_err(|_| ErrorKind::OutOfMemory)?;
455456

456457
let (front, back) = self.as_slices();
457458
buf.extend_from_slice(front);

library/std/src/io/mod.rs

+37-1
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,8 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
430430
loop {
431431
match r.read(&mut probe) {
432432
Ok(n) => {
433+
// there is no way to recover from allocation failure here
434+
// because the data has already been read.
433435
buf.extend_from_slice(&probe[..n]);
434436
return Ok(n);
435437
}
@@ -462,7 +464,8 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
462464
}
463465

464466
if buf.len() == buf.capacity() {
465-
buf.reserve(PROBE_SIZE); // buf is full, need more space
467+
// buf is full, need more space
468+
buf.try_reserve(PROBE_SIZE).map_err(|_| ErrorKind::OutOfMemory)?;
466469
}
467470

468471
let mut spare = buf.spare_capacity_mut();
@@ -815,6 +818,39 @@ pub trait Read {
815818
/// file.)
816819
///
817820
/// [`std::fs::read`]: crate::fs::read
821+
///
822+
/// ## Implementing `read_to_end`
823+
///
824+
/// When implementing the `io::Read` trait, it is recommended to allocate
825+
/// memory using [`Vec::try_reserve`]. However, this behavior is not guaranteed
826+
/// by all implementations, and `read_to_end` may not handle out-of-memory
827+
/// situations gracefully.
828+
///
829+
/// ```no_run
830+
/// # use std::io::{self, BufRead};
831+
/// # struct Example { example_datasource: io::Empty } impl Example {
832+
/// # fn get_some_data_for_the_example(&self) -> &'static [u8] { &[] }
833+
/// fn read_to_end(&mut self, dest_vec: &mut Vec<u8>) -> io::Result<usize> {
834+
/// let initial_vec_len = dest_vec.len();
835+
/// loop {
836+
/// let src_buf = self.example_datasource.fill_buf()?;
837+
/// if src_buf.is_empty() {
838+
/// break;
839+
/// }
840+
/// dest_vec.try_reserve(src_buf.len()).map_err(|_| io::ErrorKind::OutOfMemory)?;
841+
/// dest_vec.extend_from_slice(src_buf);
842+
///
843+
/// // Any irreversible side effects should happen after `try_reserve` succeeds,
844+
/// // to avoid losing data on allocation error.
845+
/// let read = src_buf.len();
846+
/// self.example_datasource.consume(read);
847+
/// }
848+
/// Ok(dest_vec.len() - initial_vec_len)
849+
/// }
850+
/// # }
851+
/// ```
852+
///
853+
/// [`Vec::try_reserve`]: crate::vec::Vec::try_reserve
818854
#[stable(feature = "rust1", since = "1.0.0")]
819855
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
820856
default_read_to_end(self, buf, None)

0 commit comments

Comments
 (0)