Skip to content

Commit 3b263ce

Browse files
committed
Auto merge of #81156 - DrMeepster:read_buf, r=joshtriplett
Implement most of RFC 2930, providing the ReadBuf abstraction This replaces the `Initializer` abstraction for permitting reading into uninitialized buffers, closing #42788. This leaves several APIs described in the RFC out of scope for the initial implementation: * read_buf_vectored * `ReadBufs` Closes #42788, by removing the relevant APIs.
2 parents 600820d + cd23799 commit 3b263ce

File tree

25 files changed

+899
-289
lines changed

25 files changed

+899
-289
lines changed

Diff for: library/std/src/fs.rs

+9-13
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ mod tests;
1313

1414
use crate::ffi::OsString;
1515
use crate::fmt;
16-
use crate::io::{self, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
16+
use crate::io::{self, IoSlice, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, Write};
1717
use crate::path::{Path, PathBuf};
1818
use crate::sys::fs as fs_imp;
1919
use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
@@ -623,15 +623,13 @@ impl Read for File {
623623
self.inner.read_vectored(bufs)
624624
}
625625

626-
#[inline]
627-
fn is_read_vectored(&self) -> bool {
628-
self.inner.is_read_vectored()
626+
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
627+
self.inner.read_buf(buf)
629628
}
630629

631630
#[inline]
632-
unsafe fn initializer(&self) -> Initializer {
633-
// SAFETY: Read is guaranteed to work on uninitialized memory
634-
unsafe { Initializer::nop() }
631+
fn is_read_vectored(&self) -> bool {
632+
self.inner.is_read_vectored()
635633
}
636634

637635
// Reserves space in the buffer based on the file size when available.
@@ -677,6 +675,10 @@ impl Read for &File {
677675
self.inner.read(buf)
678676
}
679677

678+
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
679+
self.inner.read_buf(buf)
680+
}
681+
680682
fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
681683
self.inner.read_vectored(bufs)
682684
}
@@ -686,12 +688,6 @@ impl Read for &File {
686688
self.inner.is_read_vectored()
687689
}
688690

689-
#[inline]
690-
unsafe fn initializer(&self) -> Initializer {
691-
// SAFETY: Read is guaranteed to work on uninitialized memory
692-
unsafe { Initializer::nop() }
693-
}
694-
695691
// Reserves space in the buffer based on the file size when available.
696692
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
697693
buf.reserve(buffer_capacity_required(self));

Diff for: library/std/src/io/buffered/bufreader.rs

+43-16
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use crate::cmp;
22
use crate::fmt;
33
use crate::io::{
4-
self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
4+
self, BufRead, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
55
};
6+
use crate::mem::MaybeUninit;
67

78
/// The `BufReader<R>` struct adds buffering to any reader.
89
///
@@ -47,9 +48,10 @@ use crate::io::{
4748
#[stable(feature = "rust1", since = "1.0.0")]
4849
pub struct BufReader<R> {
4950
inner: R,
50-
buf: Box<[u8]>,
51+
buf: Box<[MaybeUninit<u8>]>,
5152
pos: usize,
5253
cap: usize,
54+
init: usize,
5355
}
5456

5557
impl<R: Read> BufReader<R> {
@@ -91,11 +93,8 @@ impl<R: Read> BufReader<R> {
9193
/// ```
9294
#[stable(feature = "rust1", since = "1.0.0")]
9395
pub fn with_capacity(capacity: usize, inner: R) -> BufReader<R> {
94-
unsafe {
95-
let mut buf = Box::new_uninit_slice(capacity).assume_init();
96-
inner.initializer().initialize(&mut buf);
97-
BufReader { inner, buf, pos: 0, cap: 0 }
98-
}
96+
let buf = Box::new_uninit_slice(capacity);
97+
BufReader { inner, buf, pos: 0, cap: 0, init: 0 }
9998
}
10099
}
101100

@@ -171,7 +170,8 @@ impl<R> BufReader<R> {
171170
/// ```
172171
#[stable(feature = "bufreader_buffer", since = "1.37.0")]
173172
pub fn buffer(&self) -> &[u8] {
174-
&self.buf[self.pos..self.cap]
173+
// SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init
174+
unsafe { MaybeUninit::slice_assume_init_ref(&self.buf[self.pos..self.cap]) }
175175
}
176176

177177
/// Returns the number of bytes the internal buffer can hold at once.
@@ -271,6 +271,25 @@ impl<R: Read> Read for BufReader<R> {
271271
Ok(nread)
272272
}
273273

274+
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
275+
// If we don't have any buffered data and we're doing a massive read
276+
// (larger than our internal buffer), bypass our internal buffer
277+
// entirely.
278+
if self.pos == self.cap && buf.remaining() >= self.buf.len() {
279+
self.discard_buffer();
280+
return self.inner.read_buf(buf);
281+
}
282+
283+
let prev = buf.filled_len();
284+
285+
let mut rem = self.fill_buf()?;
286+
rem.read_buf(buf)?;
287+
288+
self.consume(buf.filled_len() - prev); //slice impl of read_buf known to never unfill buf
289+
290+
Ok(())
291+
}
292+
274293
// Small read_exacts from a BufReader are extremely common when used with a deserializer.
275294
// The default implementation calls read in a loop, which results in surprisingly poor code
276295
// generation for the common path where the buffer has enough bytes to fill the passed-in
@@ -303,16 +322,11 @@ impl<R: Read> Read for BufReader<R> {
303322
self.inner.is_read_vectored()
304323
}
305324

306-
// we can't skip unconditionally because of the large buffer case in read.
307-
unsafe fn initializer(&self) -> Initializer {
308-
self.inner.initializer()
309-
}
310-
311325
// The inner reader might have an optimized `read_to_end`. Drain our buffer and then
312326
// delegate to the inner implementation.
313327
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
314328
let nread = self.cap - self.pos;
315-
buf.extend_from_slice(&self.buf[self.pos..self.cap]);
329+
buf.extend_from_slice(&self.buffer());
316330
self.discard_buffer();
317331
Ok(nread + self.inner.read_to_end(buf)?)
318332
}
@@ -363,10 +377,23 @@ impl<R: Read> BufRead for BufReader<R> {
363377
// to tell the compiler that the pos..cap slice is always valid.
364378
if self.pos >= self.cap {
365379
debug_assert!(self.pos == self.cap);
366-
self.cap = self.inner.read(&mut self.buf)?;
380+
381+
let mut readbuf = ReadBuf::uninit(&mut self.buf);
382+
383+
// SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()`
384+
// from the last time this function was called
385+
unsafe {
386+
readbuf.assume_init(self.init);
387+
}
388+
389+
self.inner.read_buf(&mut readbuf)?;
390+
391+
self.cap = readbuf.filled_len();
392+
self.init = readbuf.initialized_len();
393+
367394
self.pos = 0;
368395
}
369-
Ok(&self.buf[self.pos..self.cap])
396+
Ok(self.buffer())
370397
}
371398

372399
fn consume(&mut self, amt: usize) {

Diff for: library/std/src/io/buffered/tests.rs

+51-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::io::prelude::*;
2-
use crate::io::{self, BufReader, BufWriter, ErrorKind, IoSlice, LineWriter, SeekFrom};
2+
use crate::io::{self, BufReader, BufWriter, ErrorKind, IoSlice, LineWriter, ReadBuf, SeekFrom};
3+
use crate::mem::MaybeUninit;
34
use crate::panic;
45
use crate::sync::atomic::{AtomicUsize, Ordering};
56
use crate::thread;
@@ -55,6 +56,55 @@ fn test_buffered_reader() {
5556
assert_eq!(reader.read(&mut buf).unwrap(), 0);
5657
}
5758

59+
#[test]
60+
fn test_buffered_reader_read_buf() {
61+
let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4];
62+
let mut reader = BufReader::with_capacity(2, inner);
63+
64+
let mut buf = [MaybeUninit::uninit(); 3];
65+
let mut buf = ReadBuf::uninit(&mut buf);
66+
67+
reader.read_buf(&mut buf).unwrap();
68+
69+
assert_eq!(buf.filled(), [5, 6, 7]);
70+
assert_eq!(reader.buffer(), []);
71+
72+
let mut buf = [MaybeUninit::uninit(); 2];
73+
let mut buf = ReadBuf::uninit(&mut buf);
74+
75+
reader.read_buf(&mut buf).unwrap();
76+
77+
assert_eq!(buf.filled(), [0, 1]);
78+
assert_eq!(reader.buffer(), []);
79+
80+
let mut buf = [MaybeUninit::uninit(); 1];
81+
let mut buf = ReadBuf::uninit(&mut buf);
82+
83+
reader.read_buf(&mut buf).unwrap();
84+
85+
assert_eq!(buf.filled(), [2]);
86+
assert_eq!(reader.buffer(), [3]);
87+
88+
let mut buf = [MaybeUninit::uninit(); 3];
89+
let mut buf = ReadBuf::uninit(&mut buf);
90+
91+
reader.read_buf(&mut buf).unwrap();
92+
93+
assert_eq!(buf.filled(), [3]);
94+
assert_eq!(reader.buffer(), []);
95+
96+
reader.read_buf(&mut buf).unwrap();
97+
98+
assert_eq!(buf.filled(), [3, 4]);
99+
assert_eq!(reader.buffer(), []);
100+
101+
buf.clear();
102+
103+
reader.read_buf(&mut buf).unwrap();
104+
105+
assert_eq!(buf.filled_len(), 0);
106+
}
107+
58108
#[test]
59109
fn test_buffered_reader_seek() {
60110
let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4];

Diff for: library/std/src/io/copy.rs

+38-43
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::{BufWriter, ErrorKind, Read, Result, Write, DEFAULT_BUF_SIZE};
1+
use super::{BufWriter, ErrorKind, Read, ReadBuf, Result, Write, DEFAULT_BUF_SIZE};
22
use crate::mem::MaybeUninit;
33

44
/// Copies the entire contents of a reader into a writer.
@@ -82,33 +82,30 @@ impl<I: Write> BufferedCopySpec for BufWriter<I> {
8282
return stack_buffer_copy(reader, writer);
8383
}
8484

85-
// FIXME: #42788
86-
//
87-
// - This creates a (mut) reference to a slice of
88-
// _uninitialized_ integers, which is **undefined behavior**
89-
//
90-
// - Only the standard library gets to soundly "ignore" this,
91-
// based on its privileged knowledge of unstable rustc
92-
// internals;
93-
unsafe {
94-
let spare_cap = writer.buffer_mut().spare_capacity_mut();
95-
reader.initializer().initialize(MaybeUninit::slice_assume_init_mut(spare_cap));
96-
}
97-
9885
let mut len = 0;
86+
let mut init = 0;
9987

10088
loop {
10189
let buf = writer.buffer_mut();
102-
let spare_cap = buf.spare_capacity_mut();
103-
104-
if spare_cap.len() >= DEFAULT_BUF_SIZE {
105-
match reader.read(unsafe { MaybeUninit::slice_assume_init_mut(spare_cap) }) {
106-
Ok(0) => return Ok(len), // EOF reached
107-
Ok(bytes_read) => {
108-
assert!(bytes_read <= spare_cap.len());
109-
// SAFETY: The initializer contract guarantees that either it or `read`
110-
// will have initialized these bytes. And we just checked that the number
111-
// of bytes is within the buffer capacity.
90+
let mut read_buf = ReadBuf::uninit(buf.spare_capacity_mut());
91+
92+
// SAFETY: init is either 0 or the initialized_len of the previous iteration
93+
unsafe {
94+
read_buf.assume_init(init);
95+
}
96+
97+
if read_buf.capacity() >= DEFAULT_BUF_SIZE {
98+
match reader.read_buf(&mut read_buf) {
99+
Ok(()) => {
100+
let bytes_read = read_buf.filled_len();
101+
102+
if bytes_read == 0 {
103+
return Ok(len);
104+
}
105+
106+
init = read_buf.initialized_len() - bytes_read;
107+
108+
// SAFETY: ReadBuf guarantees all of its filled bytes are init
112109
unsafe { buf.set_len(buf.len() + bytes_read) };
113110
len += bytes_read as u64;
114111
// Read again if the buffer still has enough capacity, as BufWriter itself would do
@@ -129,28 +126,26 @@ fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
129126
reader: &mut R,
130127
writer: &mut W,
131128
) -> Result<u64> {
132-
let mut buf = MaybeUninit::<[u8; DEFAULT_BUF_SIZE]>::uninit();
133-
// FIXME: #42788
134-
//
135-
// - This creates a (mut) reference to a slice of
136-
// _uninitialized_ integers, which is **undefined behavior**
137-
//
138-
// - Only the standard library gets to soundly "ignore" this,
139-
// based on its privileged knowledge of unstable rustc
140-
// internals;
141-
unsafe {
142-
reader.initializer().initialize(buf.assume_init_mut());
143-
}
129+
let mut buf = [MaybeUninit::uninit(); DEFAULT_BUF_SIZE];
130+
let mut buf = ReadBuf::uninit(&mut buf);
131+
132+
let mut len = 0;
144133

145-
let mut written = 0;
146134
loop {
147-
let len = match reader.read(unsafe { buf.assume_init_mut() }) {
148-
Ok(0) => return Ok(written),
149-
Ok(len) => len,
150-
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
135+
match reader.read_buf(&mut buf) {
136+
Ok(()) => {}
137+
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
151138
Err(e) => return Err(e),
152139
};
153-
writer.write_all(unsafe { &buf.assume_init_ref()[..len] })?;
154-
written += len as u64;
140+
141+
if buf.filled().is_empty() {
142+
break;
143+
}
144+
145+
len += buf.filled().len() as u64;
146+
writer.write_all(buf.filled())?;
147+
buf.clear();
155148
}
149+
150+
Ok(len)
156151
}

Diff for: library/std/src/io/cursor.rs

+11-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ mod tests;
44
use crate::io::prelude::*;
55

66
use crate::cmp;
7-
use crate::io::{self, Error, ErrorKind, Initializer, IoSlice, IoSliceMut, SeekFrom};
7+
use crate::io::{self, Error, ErrorKind, IoSlice, IoSliceMut, ReadBuf, SeekFrom};
88

99
use core::convert::TryInto;
1010

@@ -324,6 +324,16 @@ where
324324
Ok(n)
325325
}
326326

327+
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
328+
let prev_filled = buf.filled_len();
329+
330+
Read::read_buf(&mut self.fill_buf()?, buf)?;
331+
332+
self.pos += (buf.filled_len() - prev_filled) as u64;
333+
334+
Ok(())
335+
}
336+
327337
fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
328338
let mut nread = 0;
329339
for buf in bufs {
@@ -346,11 +356,6 @@ where
346356
self.pos += n as u64;
347357
Ok(())
348358
}
349-
350-
#[inline]
351-
unsafe fn initializer(&self) -> Initializer {
352-
Initializer::nop()
353-
}
354359
}
355360

356361
#[stable(feature = "rust1", since = "1.0.0")]

0 commit comments

Comments
 (0)