diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index b07733d3c803c..89570fde91aa1 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -449,6 +449,25 @@ impl Read for File { self.inner.read(buf) } + fn size_snapshot(&self) -> Option { + // Ignore I/O errors; we're just querying the size and position of an + // already-open file in preparation for reading from it. + if let Ok(meta) = self.metadata() { + let len = meta.len(); + if let Ok(position) = self.inner.seek(SeekFrom::Current(0)) { + if let Some(distance) = len.checked_sub(position) { + let size = distance as usize; + // Don't trust a length of zero. For example, "pseudofiles" + // on Linux like /proc/meminfo report a size of 0. + if size != 0 && size as u64 == distance { + return Some(size); + } + } + } + } + None + } + #[inline] unsafe fn initializer(&self) -> Initializer { Initializer::nop() @@ -473,6 +492,10 @@ impl<'a> Read for &'a File { self.inner.read(buf) } + fn size_snapshot(&self) -> Option { + (**self).size_snapshot() + } + #[inline] unsafe fn initializer(&self) -> Initializer { Initializer::nop() @@ -782,6 +805,8 @@ impl Metadata { /// Returns the size of the file, in bytes, this metadata is for. /// + /// As a special case, a size of `0` indicates that the size is unknown. + /// /// # Examples /// /// ``` diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs index 6d3fbc9d26822..9989bd01ab8c9 100644 --- a/src/libstd/io/buffered.rs +++ b/src/libstd/io/buffered.rs @@ -211,6 +211,15 @@ impl Read for BufReader { Ok(nread) } + #[inline] + fn size_snapshot(&self) -> Option { + let buffered_len = self.cap - self.pos; + if let Some(size) = self.inner.size_snapshot() { + return buffered_len.checked_add(size); + } + None + } + // we can't skip unconditionally because of the large buffer case in read. unsafe fn initializer(&self) -> Initializer { self.inner.initializer() diff --git a/src/libstd/io/cursor.rs b/src/libstd/io/cursor.rs index b5ea5531b65a7..b6192435e6c71 100644 --- a/src/libstd/io/cursor.rs +++ b/src/libstd/io/cursor.rs @@ -237,6 +237,16 @@ impl Read for Cursor where T: AsRef<[u8]> { Ok(()) } + fn size_snapshot(&self) -> Option { + if let Some(diff) = (self.inner.as_ref().len() as u64).checked_sub(self.pos) { + let size = diff as usize; + if size as u64 == diff { + return Some(size); + } + } + None + } + #[inline] unsafe fn initializer(&self) -> Initializer { Initializer::nop() diff --git a/src/libstd/io/impls.rs b/src/libstd/io/impls.rs index fe1179a3b4a18..4c5a94da3ac1e 100644 --- a/src/libstd/io/impls.rs +++ b/src/libstd/io/impls.rs @@ -23,6 +23,11 @@ impl<'a, R: Read + ?Sized> Read for &'a mut R { (**self).read(buf) } + #[inline] + fn size_snapshot(&self) -> Option { + (**self).size_snapshot() + } + #[inline] unsafe fn initializer(&self) -> Initializer { (**self).initializer() @@ -92,6 +97,11 @@ impl Read for Box { (**self).read(buf) } + #[inline] + fn size_snapshot(&self) -> Option { + (**self).size_snapshot() + } + #[inline] unsafe fn initializer(&self) -> Initializer { (**self).initializer() @@ -181,6 +191,11 @@ impl<'a> Read for &'a [u8] { Ok(amt) } + #[inline] + fn size_snapshot(&self) -> Option { + Some(self.len()) + } + #[inline] unsafe fn initializer(&self) -> Initializer { Initializer::nop() diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index b7a3695b47096..7459161dc9572 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -366,32 +366,50 @@ fn append_to_string(buf: &mut String, f: F) -> Result fn read_to_end(r: &mut R, buf: &mut Vec) -> Result { let start_len = buf.len(); let mut g = Guard { len: buf.len(), buf: buf }; - let ret; + let size_snapshot = r.size_snapshot(); + + // Determine the size to start reading with. + let initial_resize_len = if let Some(size) = size_snapshot { + // We know the (present) size. Don't use reserve_exact because when the + // initial size of buf is zero, reserve should still give us exactly the + // size we request, and when it's non-zero, we're concatenating things. + g.buf.reserve(size); + start_len + size + } else { + // We don't know the size. Start with a relatively small guess. + g.buf.reserve(32); + g.buf.capacity() + }; + unsafe { + g.buf.set_len(initial_resize_len); + r.initializer().initialize(&mut g.buf[g.len..]); + } + loop { - if g.len == g.buf.len() { - unsafe { - g.buf.reserve(32); - let capacity = g.buf.capacity(); - g.buf.set_len(capacity); - r.initializer().initialize(&mut g.buf[g.len..]); - } + match r.read(&mut g.buf[g.len..]) { + Ok(0) => break, + Ok(n) => g.len += n, + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e), } - match r.read(&mut g.buf[g.len..]) { - Ok(0) => { - ret = Ok(g.len - start_len); + if g.len == g.buf.len() { + if size_snapshot.is_some() { + // We finished what the snapshot told us, so we're done. + debug_assert_eq!(size_snapshot.unwrap(), g.len - start_len); break; } - Ok(n) => g.len += n, - Err(ref e) if e.kind() == ErrorKind::Interrupted => {} - Err(e) => { - ret = Err(e); - break; + // We've used up our available buffer space; allocate more. + g.buf.reserve(32); + let capacity = g.buf.capacity(); + unsafe { + g.buf.set_len(capacity); + r.initializer().initialize(&mut g.buf[g.len..]); } } } - ret + Ok(g.len - start_len) } /// The `Read` trait allows for reading bytes from a source. @@ -553,6 +571,21 @@ pub trait Read { Initializer::zeroing() } + /// Return a snapshot of how many bytes would be read from this source until EOF + /// if read immediately, or None if that is unknown. Depending on the source, the + /// size may change at any time, so this isn't a guarantee that exactly that number + /// of bytes will actually be read. + /// + /// This is used by [`read_to_end`] and [`read_to_string`] to pre-allocate a memory buffer. + /// + /// [`read_to_end`]: #method.read_to_end + /// [`read_to_string`]: #method.read_to_string + #[unstable(feature = "read_size_snapshot", issue = /* FIXME */ "0")] + #[inline] + fn size_snapshot(&self) -> Option { + None + } + /// Read all bytes until EOF in this source, placing them into `buf`. /// /// All bytes read from this source will be appended to the specified buffer @@ -1729,6 +1762,19 @@ impl Read for Chain { self.second.read(buf) } + fn size_snapshot(&self) -> Option { + if self.done_first { + self.second.size_snapshot() + } else { + if let Some(second_size) = self.second.size_snapshot() { + if let Some(first_size) = self.first.size_snapshot() { + return first_size.checked_add(second_size); + } + } + None + } + } + unsafe fn initializer(&self) -> Initializer { let initializer = self.first.initializer(); if initializer.should_initialize() { @@ -1927,6 +1973,17 @@ impl Read for Take { Ok(n) } + fn size_snapshot(&self) -> Option { + if let Some(inner_size) = self.inner.size_snapshot() { + let min = cmp::min(self.limit, inner_size as u64); + let size = min as usize; + if size as u64 == min { + return Some(size); + } + } + None + } + unsafe fn initializer(&self) -> Initializer { self.inner.initializer() }