Skip to content

Commit ecbb896

Browse files
committed
Add Read::initializer.
This is an API that allows types to indicate that they can be passed buffers of uninitialized memory which can improve performance.
1 parent 4450779 commit ecbb896

File tree

28 files changed

+222
-269
lines changed

28 files changed

+222
-269
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# `read_initializer`
2+
3+
The tracking issue for this feature is: [#42788]
4+
5+
[#0]: https://github.com/rust-lang/rust/issues/42788
6+
7+
------------------------

src/libstd/fs.rs

+9-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
use fmt;
2121
use ffi::OsString;
22-
use io::{self, SeekFrom, Seek, Read, Write};
22+
use io::{self, SeekFrom, Seek, Read, Initializer, Write};
2323
use path::{Path, PathBuf};
2424
use sys::fs as fs_imp;
2525
use sys_common::{AsInnerMut, FromInner, AsInner, IntoInner};
@@ -446,8 +446,10 @@ impl Read for File {
446446
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
447447
self.inner.read(buf)
448448
}
449-
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
450-
self.inner.read_to_end(buf)
449+
450+
#[inline]
451+
unsafe fn initializer(&self) -> Initializer {
452+
Initializer::nop()
451453
}
452454
}
453455
#[stable(feature = "rust1", since = "1.0.0")]
@@ -468,8 +470,10 @@ impl<'a> Read for &'a File {
468470
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
469471
self.inner.read(buf)
470472
}
471-
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
472-
self.inner.read_to_end(buf)
473+
474+
#[inline]
475+
unsafe fn initializer(&self) -> Initializer {
476+
Initializer::nop()
473477
}
474478
}
475479
#[stable(feature = "rust1", since = "1.0.0")]

src/libstd/io/buffered.rs

+16-6
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use io::prelude::*;
1515
use cmp;
1616
use error;
1717
use fmt;
18-
use io::{self, DEFAULT_BUF_SIZE, Error, ErrorKind, SeekFrom};
18+
use io::{self, Initializer, DEFAULT_BUF_SIZE, Error, ErrorKind, SeekFrom};
1919
use memchr;
2020

2121
/// The `BufReader` struct adds buffering to any reader.
@@ -92,11 +92,16 @@ impl<R: Read> BufReader<R> {
9292
/// ```
9393
#[stable(feature = "rust1", since = "1.0.0")]
9494
pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
95-
BufReader {
96-
inner: inner,
97-
buf: vec![0; cap].into_boxed_slice(),
98-
pos: 0,
99-
cap: 0,
95+
unsafe {
96+
let mut buffer = Vec::with_capacity(cap);
97+
buffer.set_len(cap);
98+
inner.initializer().initialize(&mut buffer);
99+
BufReader {
100+
inner: inner,
101+
buf: buffer.into_boxed_slice(),
102+
pos: 0,
103+
cap: 0,
104+
}
100105
}
101106
}
102107

@@ -180,6 +185,11 @@ impl<R: Read> Read for BufReader<R> {
180185
self.consume(nread);
181186
Ok(nread)
182187
}
188+
189+
// we can't skip unconditionally because of the large buffer case in read.
190+
unsafe fn initializer(&self) -> Initializer {
191+
self.inner.initializer()
192+
}
183193
}
184194

185195
#[stable(feature = "rust1", since = "1.0.0")]

src/libstd/io/cursor.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use io::prelude::*;
1212

1313
use core::convert::TryInto;
1414
use cmp;
15-
use io::{self, SeekFrom, Error, ErrorKind};
15+
use io::{self, Initializer, SeekFrom, Error, ErrorKind};
1616

1717
/// A `Cursor` wraps another type and provides it with a
1818
/// [`Seek`] implementation.
@@ -229,6 +229,11 @@ impl<T> Read for Cursor<T> where T: AsRef<[u8]> {
229229
self.pos += n as u64;
230230
Ok(n)
231231
}
232+
233+
#[inline]
234+
unsafe fn initializer(&self) -> Initializer {
235+
Initializer::nop()
236+
}
232237
}
233238

234239
#[stable(feature = "rust1", since = "1.0.0")]

src/libstd/io/impls.rs

+16-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// except according to those terms.
1010

1111
use cmp;
12-
use io::{self, SeekFrom, Read, Write, Seek, BufRead, Error, ErrorKind};
12+
use io::{self, SeekFrom, Read, Initializer, Write, Seek, BufRead, Error, ErrorKind};
1313
use fmt;
1414
use mem;
1515

@@ -23,6 +23,11 @@ impl<'a, R: Read + ?Sized> Read for &'a mut R {
2323
(**self).read(buf)
2424
}
2525

26+
#[inline]
27+
unsafe fn initializer(&self) -> Initializer {
28+
(**self).initializer()
29+
}
30+
2631
#[inline]
2732
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
2833
(**self).read_to_end(buf)
@@ -87,6 +92,11 @@ impl<R: Read + ?Sized> Read for Box<R> {
8792
(**self).read(buf)
8893
}
8994

95+
#[inline]
96+
unsafe fn initializer(&self) -> Initializer {
97+
(**self).initializer()
98+
}
99+
90100
#[inline]
91101
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
92102
(**self).read_to_end(buf)
@@ -171,6 +181,11 @@ impl<'a> Read for &'a [u8] {
171181
Ok(amt)
172182
}
173183

184+
#[inline]
185+
unsafe fn initializer(&self) -> Initializer {
186+
Initializer::nop()
187+
}
188+
174189
#[inline]
175190
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
176191
if buf.len() > self.len() {

src/libstd/io/mod.rs

+109-19
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ use fmt;
275275
use result;
276276
use str;
277277
use memchr;
278+
use ptr;
278279

279280
#[stable(feature = "rust1", since = "1.0.0")]
280281
pub use self::buffered::{BufReader, BufWriter, LineWriter};
@@ -292,7 +293,7 @@ pub use self::stdio::{stdin, stdout, stderr, Stdin, Stdout, Stderr};
292293
pub use self::stdio::{StdoutLock, StderrLock, StdinLock};
293294
#[unstable(feature = "print_internals", issue = "0")]
294295
pub use self::stdio::{_print, _eprint};
295-
#[unstable(feature = "libstd_io_internals", issue = "0")]
296+
#[unstable(feature = "libstd_io_internals", issue = "42788")]
296297
#[doc(no_inline, hidden)]
297298
pub use self::stdio::{set_panic, set_print};
298299

@@ -307,6 +308,14 @@ mod stdio;
307308

308309
const DEFAULT_BUF_SIZE: usize = ::sys_common::io::DEFAULT_BUF_SIZE;
309310

311+
struct Guard<'a> { buf: &'a mut Vec<u8>, len: usize }
312+
313+
impl<'a> Drop for Guard<'a> {
314+
fn drop(&mut self) {
315+
unsafe { self.buf.set_len(self.len); }
316+
}
317+
}
318+
310319
// A few methods below (read_to_string, read_line) will append data into a
311320
// `String` buffer, but we need to be pretty careful when doing this. The
312321
// implementation will just call `.as_mut_vec()` and then delegate to a
@@ -328,23 +337,16 @@ const DEFAULT_BUF_SIZE: usize = ::sys_common::io::DEFAULT_BUF_SIZE;
328337
fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
329338
where F: FnOnce(&mut Vec<u8>) -> Result<usize>
330339
{
331-
struct Guard<'a> { s: &'a mut Vec<u8>, len: usize }
332-
impl<'a> Drop for Guard<'a> {
333-
fn drop(&mut self) {
334-
unsafe { self.s.set_len(self.len); }
335-
}
336-
}
337-
338340
unsafe {
339-
let mut g = Guard { len: buf.len(), s: buf.as_mut_vec() };
340-
let ret = f(g.s);
341-
if str::from_utf8(&g.s[g.len..]).is_err() {
341+
let mut g = Guard { len: buf.len(), buf: buf.as_mut_vec() };
342+
let ret = f(g.buf);
343+
if str::from_utf8(&g.buf[g.len..]).is_err() {
342344
ret.and_then(|_| {
343345
Err(Error::new(ErrorKind::InvalidData,
344346
"stream did not contain valid UTF-8"))
345347
})
346348
} else {
347-
g.len = g.s.len();
349+
g.len = g.buf.len();
348350
ret
349351
}
350352
}
@@ -356,25 +358,32 @@ fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
356358
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
357359
// time is 4,500 times (!) slower than this if the reader has a very small
358360
// amount of data to return.
361+
//
362+
// Because we're extending the buffer with uninitialized data for trusted
363+
// readers, we need to make sure to truncate that if any of this panics.
359364
fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
360365
let start_len = buf.len();
361-
let mut len = start_len;
366+
let mut g = Guard { len: buf.len(), buf: buf };
362367
let mut new_write_size = 16;
363368
let ret;
364369
loop {
365-
if len == buf.len() {
370+
if g.len == g.buf.len() {
366371
if new_write_size < DEFAULT_BUF_SIZE {
367372
new_write_size *= 2;
368373
}
369-
buf.resize(len + new_write_size, 0);
374+
unsafe {
375+
g.buf.reserve(new_write_size);
376+
g.buf.set_len(g.len + new_write_size);
377+
r.initializer().initialize(&mut g.buf[g.len..]);
378+
}
370379
}
371380

372-
match r.read(&mut buf[len..]) {
381+
match r.read(&mut g.buf[g.len..]) {
373382
Ok(0) => {
374-
ret = Ok(len - start_len);
383+
ret = Ok(g.len - start_len);
375384
break;
376385
}
377-
Ok(n) => len += n,
386+
Ok(n) => g.len += n,
378387
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
379388
Err(e) => {
380389
ret = Err(e);
@@ -383,7 +392,6 @@ fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize>
383392
}
384393
}
385394

386-
buf.truncate(len);
387395
ret
388396
}
389397

@@ -494,6 +502,31 @@ pub trait Read {
494502
#[stable(feature = "rust1", since = "1.0.0")]
495503
fn read(&mut self, buf: &mut [u8]) -> Result<usize>;
496504

505+
/// Determines if this `Read`er can work with buffers of uninitialized
506+
/// memory.
507+
///
508+
/// The default implementation returns an initializer which will zero
509+
/// buffers.
510+
///
511+
/// If a `Read`er guarantees that it can work properly with uninitialized
512+
/// memory, it should call `Initializer::nop()`. See the documentation for
513+
/// `Initializer` for details.
514+
///
515+
/// The behavior of this method must be independent of the state of the
516+
/// `Read`er - the method only takes `&self` so that it can be used through
517+
/// trait objects.
518+
///
519+
/// # Unsafety
520+
///
521+
/// This method is unsafe because a `Read`er could otherwise return a
522+
/// non-zeroing `Initializer` from another `Read` type without an `unsafe`
523+
/// block.
524+
#[unstable(feature = "read_initializer", issue = "42788")]
525+
#[inline]
526+
unsafe fn initializer(&self) -> Initializer {
527+
Initializer::zeroing()
528+
}
529+
497530
/// Read all bytes until EOF in this source, placing them into `buf`.
498531
///
499532
/// All bytes read from this source will be appended to the specified buffer
@@ -829,6 +862,50 @@ pub trait Read {
829862
}
830863
}
831864

865+
/// A type used to conditionally initialize buffers passed to `Read` methods.
866+
#[unstable(feature = "read_initializer", issue = "42788")]
867+
#[derive(Debug)]
868+
pub struct Initializer(bool);
869+
870+
impl Initializer {
871+
/// Returns a new `Initializer` which will zero out buffers.
872+
#[unstable(feature = "read_initializer", issue = "42788")]
873+
#[inline]
874+
pub fn zeroing() -> Initializer {
875+
Initializer(true)
876+
}
877+
878+
/// Returns a new `Initializer` which will not zero out buffers.
879+
///
880+
/// # Unsafety
881+
///
882+
/// This may only be called by `Read`ers which guarantee that they will not
883+
/// read from buffers passed to `Read` methods, and that the return value of
884+
/// the method accurately reflects the number of bytes that have been
885+
/// written to the head of the buffer.
886+
#[unstable(feature = "read_initializer", issue = "42788")]
887+
#[inline]
888+
pub unsafe fn nop() -> Initializer {
889+
Initializer(false)
890+
}
891+
892+
/// Indicates if a buffer should be initialized.
893+
#[unstable(feature = "read_initializer", issue = "42788")]
894+
#[inline]
895+
pub fn should_initialize(&self) -> bool {
896+
self.0
897+
}
898+
899+
/// Initializes a buffer if necessary.
900+
#[unstable(feature = "read_initializer", issue = "42788")]
901+
#[inline]
902+
pub fn initialize(&self, buf: &mut [u8]) {
903+
if self.should_initialize() {
904+
unsafe { ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len()) }
905+
}
906+
}
907+
}
908+
832909
/// A trait for objects which are byte-oriented sinks.
833910
///
834911
/// Implementors of the `Write` trait are sometimes called 'writers'.
@@ -1608,6 +1685,15 @@ impl<T: Read, U: Read> Read for Chain<T, U> {
16081685
}
16091686
self.second.read(buf)
16101687
}
1688+
1689+
unsafe fn initializer(&self) -> Initializer {
1690+
let initializer = self.first.initializer();
1691+
if initializer.should_initialize() {
1692+
initializer
1693+
} else {
1694+
self.second.initializer()
1695+
}
1696+
}
16111697
}
16121698

16131699
#[stable(feature = "chain_bufread", since = "1.9.0")]
@@ -1772,6 +1858,10 @@ impl<T: Read> Read for Take<T> {
17721858
self.limit -= n as u64;
17731859
Ok(n)
17741860
}
1861+
1862+
unsafe fn initializer(&self) -> Initializer {
1863+
self.inner.initializer()
1864+
}
17751865
}
17761866

17771867
#[stable(feature = "rust1", since = "1.0.0")]

0 commit comments

Comments
 (0)