From 972f2e585528515eaab4bee7270acda7f7375398 Mon Sep 17 00:00:00 2001 From: Kevin Ballard Date: Mon, 24 Mar 2014 23:22:23 -0700 Subject: [PATCH] io: Add .read_at_least() to Reader Reader.read_at_least() ensures that at least a given number of bytes have been read. The most common use-case for this is ensuring at least 1 byte has been read. If the reader returns 0 enough times in a row, a new error kind NoProgress will be returned instead of looping infinitely. This change is necessary in order to properly support Readers that repeatedly return 0, either because they're broken, or because they're attempting to do a non-blocking read on some resource that never becomes available. Also add .push() and .push_at_least() methods. push() is like read() but the results are appended to the passed Vec. Remove Reader.fill() and Reader.push_exact() as they end up being thin wrappers around read_at_least() and push_at_least(). [breaking-change] --- src/librand/reader.rs | 4 +- src/libstd/io/extensions.rs | 36 ++--- src/libstd/io/mem.rs | 12 +- src/libstd/io/mod.rs | 285 ++++++++++++++++++++++++++++-------- 4 files changed, 253 insertions(+), 84 deletions(-) diff --git a/src/librand/reader.rs b/src/librand/reader.rs index 184146d9a29c8..e800f64463b4e 100644 --- a/src/librand/reader.rs +++ b/src/librand/reader.rs @@ -60,8 +60,8 @@ impl Rng for ReaderRng { } fn fill_bytes(&mut self, v: &mut [u8]) { if v.len() == 0 { return } - match self.reader.fill(v) { - Ok(()) => {} + match self.reader.read_at_least(v.len(), v) { + Ok(_) => {} Err(e) => fail!("ReaderRng.fill_bytes error: {}", e) } } diff --git a/src/libstd/io/extensions.rs b/src/libstd/io/extensions.rs index bb4bd50815a90..29afd2b1d9b2b 100644 --- a/src/libstd/io/extensions.rs +++ b/src/libstd/io/extensions.rs @@ -342,39 +342,39 @@ mod test { } #[test] - fn push_exact() { - let mut reader = MemReader::new(vec!(10, 11, 12, 13)); - let mut buf = vec!(8, 9); - reader.push_exact(&mut buf, 4).unwrap(); - assert!(buf == vec!(8, 9, 10, 11, 12, 13)); + fn push_at_least() { + let mut reader = MemReader::new(vec![10, 11, 12, 13]); + let mut buf = vec![8, 9]; + assert!(reader.push_at_least(4, 4, &mut buf).is_ok()); + assert!(buf == vec![8, 9, 10, 11, 12, 13]); } #[test] - fn push_exact_partial() { + fn push_at_least_partial() { let mut reader = PartialReader { count: 0, }; - let mut buf = vec!(8, 9); - reader.push_exact(&mut buf, 4).unwrap(); - assert!(buf == vec!(8, 9, 10, 11, 12, 13)); + let mut buf = vec![8, 9]; + assert!(reader.push_at_least(4, 4, &mut buf).is_ok()); + assert!(buf == vec![8, 9, 10, 11, 12, 13]); } #[test] - fn push_exact_eof() { - let mut reader = MemReader::new(vec!(10, 11)); - let mut buf = vec!(8, 9); - assert!(reader.push_exact(&mut buf, 4).is_err()); - assert!(buf == vec!(8, 9, 10, 11)); + fn push_at_least_eof() { + let mut reader = MemReader::new(vec![10, 11]); + let mut buf = vec![8, 9]; + assert!(reader.push_at_least(4, 4, &mut buf).is_err()); + assert!(buf == vec![8, 9, 10, 11]); } #[test] - fn push_exact_error() { + fn push_at_least_error() { let mut reader = ErroringLaterReader { count: 0, }; - let mut buf = vec!(8, 9); - assert!(reader.push_exact(&mut buf, 4).is_err()); - assert!(buf == vec!(8, 9, 10)); + let mut buf = vec![8, 9]; + assert!(reader.push_at_least(4, 4, &mut buf).is_err()); + assert!(buf == vec![8, 9, 10]); } #[test] diff --git a/src/libstd/io/mem.rs b/src/libstd/io/mem.rs index 9abe1bee9a32d..291b4f948416f 100644 --- a/src/libstd/io/mem.rs +++ b/src/libstd/io/mem.rs @@ -559,16 +559,16 @@ mod test { } #[test] - fn io_fill() { - let mut r = MemReader::new(vec!(1, 2, 3, 4, 5, 6, 7, 8)); + fn io_read_at_least() { + let mut r = MemReader::new(vec![1, 2, 3, 4, 5, 6, 7, 8]); let mut buf = [0, ..3]; - assert_eq!(r.fill(buf), Ok(())); + assert!(r.read_at_least(buf.len(), buf).is_ok()); assert_eq!(buf.as_slice(), &[1, 2, 3]); - assert_eq!(r.fill(buf.mut_slice_to(0)), Ok(())); + assert!(r.read_at_least(0, buf.mut_slice_to(0)).is_ok()); assert_eq!(buf.as_slice(), &[1, 2, 3]); - assert_eq!(r.fill(buf), Ok(())); + assert!(r.read_at_least(buf.len(), buf).is_ok()); assert_eq!(buf.as_slice(), &[4, 5, 6]); - assert!(r.fill(buf).is_err()); + assert!(r.read_at_least(buf.len(), buf).is_err()); assert_eq!(buf.as_slice(), &[7, 8, 6]); } } diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 0f8e37b4ee011..b654e278c3f8c 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -229,7 +229,6 @@ use slice::{Vector, MutableVector, ImmutableVector}; use str::{StrSlice, StrAllocating}; use str; use uint; -use unstable::finally::try_finally; use vec::Vec; // Reexports @@ -442,8 +441,12 @@ pub enum IoErrorKind { /// The payload contained as part of this variant is the number of bytes /// which are known to have been successfully written. ShortWrite(uint), + /// The Reader returned 0 bytes from `read()` too many times. + NoProgress, } +static NO_PROGRESS_LIMIT: uint = 1000; + /// A trait for objects which are byte-oriented streams. Readers are defined by /// one method, `read`. This function will block until data is available, /// filling in the provided buffer with any data read. @@ -456,7 +459,7 @@ pub trait Reader { // Only method which need to get implemented for this trait /// Read bytes, up to the length of `buf` and place them in `buf`. - /// Returns the number of bytes read. The number of bytes read my + /// Returns the number of bytes read. The number of bytes read may /// be less than the number requested, even 0. Returns `Err` on EOF. /// /// # Error @@ -465,74 +468,113 @@ pub trait Reader { /// `Err(IoError)`. Note that end-of-file is considered an error, and can be /// inspected for in the error's `kind` field. Also note that reading 0 /// bytes is not considered an error in all circumstances + /// + /// # Implementaton Note + /// + /// When implementing this method on a new Reader, you are strongly encouraged + /// not to return 0 if you can avoid it. fn read(&mut self, buf: &mut [u8]) -> IoResult; // Convenient helper methods based on the above methods - /// Reads a single byte. Returns `Err` on EOF. - fn read_byte(&mut self) -> IoResult { - let mut buf = [0]; - loop { - match self.read(buf) { - Ok(0) => {} - Ok(1) => return Ok(buf[0]), - Ok(_) => unreachable!(), - Err(e) => return Err(e) - } - } - } - - /// Fills the provided slice with bytes from this reader + /// Reads at least `min` bytes and places them in `buf`. + /// Returns the number of bytes read. /// - /// This will continue to call `read` until the slice has been completely - /// filled with bytes. + /// This will continue to call `read` until at least `min` bytes have been + /// read. If `read` returns 0 too many times, `NoProgress` will be + /// returned. /// /// # Error /// /// If an error occurs at any point, that error is returned, and no further /// bytes are read. - fn fill(&mut self, buf: &mut [u8]) -> IoResult<()> { + fn read_at_least(&mut self, min: uint, buf: &mut [u8]) -> IoResult { + if min > buf.len() { + return Err(IoError { detail: Some("the buffer is too short".to_owned()), + ..standard_error(InvalidInput) }); + } let mut read = 0; - while read < buf.len() { - read += try!(self.read(buf.mut_slice_from(read))); + while read < min { + let mut zeroes = 0; + loop { + match self.read(buf.mut_slice_from(read)) { + Ok(0) => { + zeroes += 1; + if zeroes >= NO_PROGRESS_LIMIT { + return Err(standard_error(NoProgress)); + } + } + Ok(n) => { + read += n; + break; + } + err@Err(_) => return err + } + } } - Ok(()) + Ok(read) + } + + /// Reads a single byte. Returns `Err` on EOF. + fn read_byte(&mut self) -> IoResult { + let mut buf = [0]; + try!(self.read_at_least(1, buf)); + Ok(buf[0]) } - /// Reads exactly `len` bytes and appends them to a vector. + /// Reads up to `len` bytes and appends them to a vector. + /// Returns the number of bytes read. The number of bytes read may be + /// less than the number requested, even 0. Returns Err on EOF. + /// + /// # Error /// - /// May push fewer than the requested number of bytes on error - /// or EOF. If `Ok(())` is returned, then all of the requested bytes were - /// pushed on to the vector, otherwise the amount `len` bytes couldn't be - /// read (an error was encountered), and the error is returned. - fn push_exact(&mut self, buf: &mut Vec, len: uint) -> IoResult<()> { - struct State<'a> { - buf: &'a mut Vec, - total_read: uint + /// If an error occurs during this I/O operation, then it is returned + /// as `Err(IoError)`. See `read()` for more details. + fn push(&mut self, len: uint, buf: &mut Vec) -> IoResult { + let start_len = buf.len(); + buf.reserve_additional(len); + + let n = { + let s = unsafe { slice_vec_capacity(buf, start_len, start_len + len) }; + try!(self.read(s)) + }; + unsafe { buf.set_len(start_len + n) }; + Ok(n) + } + + /// Reads at least `min` bytes, but no more than `len`, and appends them to + /// a vector. + /// Returns the number of bytes read. + /// + /// This will continue to call `read` until at least `min` bytes have been + /// read. If `read` returns 0 too many times, `NoProgress` will be + /// returned. + /// + /// # Error + /// + /// If an error occurs at any point, that error is returned, and no further + /// bytes are read. + fn push_at_least(&mut self, min: uint, len: uint, buf: &mut Vec) -> IoResult { + if min > len { + return Err(IoError { detail: Some("the buffer is too short".to_owned()), + ..standard_error(InvalidInput) }); } let start_len = buf.len(); - let mut s = State { buf: buf, total_read: 0 }; - - s.buf.reserve_additional(len); - unsafe { s.buf.set_len(start_len + len); } - - try_finally( - &mut s, (), - |s, _| { - while s.total_read < len { - let len = s.buf.len(); - let slice = s.buf.mut_slice(start_len + s.total_read, len); - match self.read(slice) { - Ok(nread) => { - s.total_read += nread; - } - Err(e) => return Err(e) - } - } - Ok(()) - }, - |s| unsafe { s.buf.set_len(start_len + s.total_read) }) + buf.reserve_additional(len); + + // we can't just use self.read_at_least(min, slice) because we need to push + // successful reads onto the vector before any returned errors. + + let mut read = 0; + while read < min { + read += { + let s = unsafe { slice_vec_capacity(buf, start_len + read, start_len + len) }; + try!(self.read_at_least(1, s)) + }; + unsafe { buf.set_len(start_len + read) }; + } + Ok(read) } /// Reads exactly `len` bytes and gives you back a new vector of length @@ -544,11 +586,11 @@ pub trait Reader { /// on EOF. Note that if an error is returned, then some number of bytes may /// have already been consumed from the underlying reader, and they are lost /// (not returned as part of the error). If this is unacceptable, then it is - /// recommended to use the `push_exact` or `read` methods. + /// recommended to use the `push_at_least` or `read` methods. fn read_exact(&mut self, len: uint) -> IoResult> { let mut buf = Vec::with_capacity(len); - match self.push_exact(&mut buf, len) { - Ok(()) => Ok(buf), + match self.push_at_least(len, len, &mut buf) { + Ok(_) => Ok(buf), Err(e) => Err(e), } } @@ -564,8 +606,8 @@ pub trait Reader { fn read_to_end(&mut self) -> IoResult> { let mut buf = Vec::with_capacity(DEFAULT_BUF_SIZE); loop { - match self.push_exact(&mut buf, DEFAULT_BUF_SIZE) { - Ok(()) => {} + match self.push_at_least(1, DEFAULT_BUF_SIZE, &mut buf) { + Ok(_) => {} Err(ref e) if e.kind == EndOfFile => break, Err(e) => return Err(e) } @@ -828,6 +870,29 @@ impl<'a> Reader for &'a mut Reader { fn read(&mut self, buf: &mut [u8]) -> IoResult { self.read(buf) } } +/// Returns a slice of `v` between `start` and `end`. +/// +/// Similar to `slice()` except this function only bounds the sclie on the +/// capacity of `v`, not the length. +/// +/// # Failure +/// +/// Fails when `start` or `end` point outside the capacity of `v`, or when +/// `start` > `end`. +// Private function here because we aren't sure if we want to expose this as +// API yet. If so, it should be a method on Vec. +unsafe fn slice_vec_capacity<'a, T>(v: &'a mut Vec, start: uint, end: uint) -> &'a mut [T] { + use raw::Slice; + use ptr::RawPtr; + + assert!(start <= end); + assert!(end <= v.capacity()); + transmute(Slice { + data: v.as_ptr().offset(start as int), + len: end - start + }) +} + /// A `RefReader` is a struct implementing `Reader` which contains a reference /// to another reader. This is often useful when composing streams. /// @@ -1192,7 +1257,7 @@ pub trait Buffer: Reader { fn fill_buf<'a>(&'a mut self) -> IoResult<&'a [u8]>; /// Tells this buffer that `amt` bytes have been consumed from the buffer, - /// so they should no longer be returned in calls to `fill` or `read`. + /// so they should no longer be returned in calls to `read`. fn consume(&mut self, amt: uint); /// Reads the next line of input, interpreted as a sequence of UTF-8 @@ -1439,6 +1504,7 @@ pub fn standard_error(kind: IoErrorKind) -> IoError { ResourceUnavailable => "resource unavailable", TimedOut => "operation timed out", ShortWrite(..) => "short write", + NoProgress => "no progress", }; IoError { kind: kind, @@ -1608,3 +1674,106 @@ on unix-like systems."] static AllPermissions = UserRWX.bits | GroupRWX.bits | OtherRWX.bits } ) + +#[cfg(test)] +mod tests { + use super::{IoResult, Reader, MemReader, NoProgress, InvalidInput}; + use prelude::*; + use uint; + + #[deriving(Clone, Eq, Show)] + enum BadReaderBehavior { + GoodBehavior(uint), + BadBehavior(uint) + } + + struct BadReader { + r: T, + behavior: Vec, + } + + impl BadReader { + fn new(r: T, behavior: Vec) -> BadReader { + BadReader { behavior: behavior, r: r } + } + } + + impl Reader for BadReader { + fn read(&mut self, buf: &mut [u8]) -> IoResult { + let BadReader { ref mut behavior, ref mut r } = *self; + loop { + if behavior.is_empty() { + // fall back on good + return r.read(buf); + } + match behavior.as_mut_slice()[0] { + GoodBehavior(0) => (), + GoodBehavior(ref mut x) => { + *x -= 1; + return r.read(buf); + } + BadBehavior(0) => (), + BadBehavior(ref mut x) => { + *x -= 1; + return Ok(0); + } + }; + behavior.shift(); + } + } + } + + #[test] + fn test_read_at_least() { + let mut r = BadReader::new(MemReader::new(Vec::from_slice(bytes!("hello, world!"))), + Vec::from_slice([GoodBehavior(uint::MAX)])); + let mut buf = [0u8, ..5]; + assert!(r.read_at_least(1, buf).unwrap() >= 1); + assert!(r.read_exact(5).unwrap().len() == 5); // read_exact uses read_at_least + assert!(r.read_at_least(0, buf).is_ok()); + + let mut r = BadReader::new(MemReader::new(Vec::from_slice(bytes!("hello, world!"))), + Vec::from_slice([BadBehavior(50), GoodBehavior(uint::MAX)])); + assert!(r.read_at_least(1, buf).unwrap() >= 1); + + let mut r = BadReader::new(MemReader::new(Vec::from_slice(bytes!("hello, world!"))), + Vec::from_slice([BadBehavior(1), GoodBehavior(1), + BadBehavior(50), GoodBehavior(uint::MAX)])); + assert!(r.read_at_least(1, buf).unwrap() >= 1); + assert!(r.read_at_least(1, buf).unwrap() >= 1); + + let mut r = BadReader::new(MemReader::new(Vec::from_slice(bytes!("hello, world!"))), + Vec::from_slice([BadBehavior(uint::MAX)])); + assert_eq!(r.read_at_least(1, buf).unwrap_err().kind, NoProgress); + + let mut r = MemReader::new(Vec::from_slice(bytes!("hello, world!"))); + assert_eq!(r.read_at_least(5, buf).unwrap(), 5); + assert_eq!(r.read_at_least(6, buf).unwrap_err().kind, InvalidInput); + } + + #[test] + fn test_push_at_least() { + let mut r = BadReader::new(MemReader::new(Vec::from_slice(bytes!("hello, world!"))), + Vec::from_slice([GoodBehavior(uint::MAX)])); + let mut buf = Vec::new(); + assert!(r.push_at_least(1, 5, &mut buf).unwrap() >= 1); + assert!(r.push_at_least(0, 5, &mut buf).is_ok()); + + let mut r = BadReader::new(MemReader::new(Vec::from_slice(bytes!("hello, world!"))), + Vec::from_slice([BadBehavior(50), GoodBehavior(uint::MAX)])); + assert!(r.push_at_least(1, 5, &mut buf).unwrap() >= 1); + + let mut r = BadReader::new(MemReader::new(Vec::from_slice(bytes!("hello, world!"))), + Vec::from_slice([BadBehavior(1), GoodBehavior(1), + BadBehavior(50), GoodBehavior(uint::MAX)])); + assert!(r.push_at_least(1, 5, &mut buf).unwrap() >= 1); + assert!(r.push_at_least(1, 5, &mut buf).unwrap() >= 1); + + let mut r = BadReader::new(MemReader::new(Vec::from_slice(bytes!("hello, world!"))), + Vec::from_slice([BadBehavior(uint::MAX)])); + assert_eq!(r.push_at_least(1, 5, &mut buf).unwrap_err().kind, NoProgress); + + let mut r = MemReader::new(Vec::from_slice(bytes!("hello, world!"))); + assert_eq!(r.push_at_least(5, 1, &mut buf).unwrap_err().kind, InvalidInput); + } +}