Skip to content

Commit 7a892ab

Browse files
committed
Auto merge of #117576 - the8472:fix-io-copy-vec, r=Mark-Simulacrum
Fix excessive initialization and reads beyond EOF in `io::copy(_, Vec<u8>)` specialization fixes #117545 and bczhc/bzip3-rs#8
2 parents fee5518 + 78aa5e5 commit 7a892ab

File tree

1 file changed

+54
-22
lines changed

1 file changed

+54
-22
lines changed

library/std/src/io/copy.rs

+54-22
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use super::{BorrowedBuf, BufReader, BufWriter, Read, Result, Write, DEFAULT_BUF_SIZE};
22
use crate::alloc::Allocator;
33
use crate::cmp;
4+
use crate::cmp::min;
45
use crate::collections::VecDeque;
56
use crate::io::IoSlice;
67
use crate::mem::MaybeUninit;
@@ -263,36 +264,67 @@ impl<A: Allocator> BufferedWriterSpec for Vec<u8, A> {
263264
fn copy_from<R: Read + ?Sized>(&mut self, reader: &mut R) -> Result<u64> {
264265
let mut bytes = 0;
265266

266-
// avoid allocating before we have determined that there's anything to read
267-
if self.capacity() == 0 {
268-
bytes = stack_buffer_copy(&mut reader.take(DEFAULT_BUF_SIZE as u64), self)?;
269-
if bytes == 0 {
270-
return Ok(0);
267+
// avoid inflating empty/small vecs before we have determined that there's anything to read
268+
if self.capacity() < DEFAULT_BUF_SIZE {
269+
let stack_read_limit = DEFAULT_BUF_SIZE as u64;
270+
bytes = stack_buffer_copy(&mut reader.take(stack_read_limit), self)?;
271+
// fewer bytes than requested -> EOF reached
272+
if bytes < stack_read_limit {
273+
return Ok(bytes);
271274
}
272275
}
273276

277+
// don't immediately offer the vec's whole spare capacity, otherwise
278+
// we might have to fully initialize it if the reader doesn't have a custom read_buf() impl
279+
let mut max_read_size = DEFAULT_BUF_SIZE;
280+
274281
loop {
275282
self.reserve(DEFAULT_BUF_SIZE);
276-
let mut buf: BorrowedBuf<'_> = self.spare_capacity_mut().into();
277-
match reader.read_buf(buf.unfilled()) {
278-
Ok(()) => {}
279-
Err(e) if e.is_interrupted() => continue,
280-
Err(e) => return Err(e),
281-
};
283+
let mut initialized_spare_capacity = 0;
282284

283-
let read = buf.filled().len();
284-
if read == 0 {
285-
break;
286-
}
285+
loop {
286+
let buf = self.spare_capacity_mut();
287+
let read_size = min(max_read_size, buf.len());
288+
let mut buf = BorrowedBuf::from(&mut buf[..read_size]);
289+
// SAFETY: init is either 0 or the init_len from the previous iteration.
290+
unsafe {
291+
buf.set_init(initialized_spare_capacity);
292+
}
293+
match reader.read_buf(buf.unfilled()) {
294+
Ok(()) => {
295+
let bytes_read = buf.len();
287296

288-
// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
289-
// and the number of read bytes can't exceed the spare capacity since
290-
// that's what the buffer is borrowing from.
291-
unsafe { self.set_len(self.len() + read) };
292-
bytes += read as u64;
293-
}
297+
// EOF
298+
if bytes_read == 0 {
299+
return Ok(bytes);
300+
}
294301

295-
Ok(bytes)
302+
// the reader is returning short reads but it doesn't call ensure_init()
303+
if buf.init_len() < buf.capacity() {
304+
max_read_size = usize::MAX;
305+
}
306+
// the reader hasn't returned short reads so far
307+
if bytes_read == buf.capacity() {
308+
max_read_size *= 2;
309+
}
310+
311+
initialized_spare_capacity = buf.init_len() - bytes_read;
312+
bytes += bytes_read as u64;
313+
// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
314+
// and the number of read bytes can't exceed the spare capacity since
315+
// that's what the buffer is borrowing from.
316+
unsafe { self.set_len(self.len() + bytes_read) };
317+
318+
// spare capacity full, reserve more
319+
if self.len() == self.capacity() {
320+
break;
321+
}
322+
}
323+
Err(e) if e.is_interrupted() => continue,
324+
Err(e) => return Err(e),
325+
}
326+
}
327+
}
296328
}
297329
}
298330

0 commit comments

Comments
 (0)