Skip to content

Commit

Permalink
Introduce StrWriter to allow ESW to wrap both a String and a &mut String
Browse files Browse the repository at this point in the history
  • Loading branch information
marshallpierce committed Sep 25, 2020
1 parent 24ca190 commit 5a56885
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 17 deletions.
15 changes: 15 additions & 0 deletions benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,20 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {

b.iter(|| {
let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG);
stream_enc.write_all(&v).unwrap();
stream_enc.flush().unwrap();
let _ = stream_enc.into_inner();
});
}

fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) {
let mut v: Vec<u8> = Vec::with_capacity(size);
fill(&mut v);

let mut buf = String::new();
b.iter(|| {
buf.clear();
let mut stream_enc = write::EncoderStringWriter::from(&mut buf, TEST_CONFIG);
stream_enc.write_all(&v).unwrap();
stream_enc.flush().unwrap();
let _ = stream_enc.into_inner();
Expand Down Expand Up @@ -160,6 +174,7 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
.with_function("encode_slice", do_encode_bench_slice)
.with_function("encode_reuse_buf_stream", do_encode_bench_stream)
.with_function("encode_string_stream", do_encode_bench_string_stream)
.with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream)
}

fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
Expand Down
6 changes: 6 additions & 0 deletions examples/make_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,14 @@ fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) {
}

fn check_alphabet(alphabet: &[u8]) {
// ensure all characters are distinct
assert_eq!(64, alphabet.len());
let mut set: HashSet<u8> = HashSet::new();
set.extend(alphabet);
assert_eq!(64, set.len());

// must be ASCII to be valid as single UTF-8 bytes
for &b in alphabet {
assert!(b <= 0x7F_u8);
}
}
99 changes: 82 additions & 17 deletions src/write/encoder_string_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use super::encoder::EncoderWriter;
///
/// # Examples
///
/// Buffer base64 in a new String:
///
/// ```
/// use std::io::Write;
///
Expand All @@ -21,27 +23,40 @@ use super::encoder::EncoderWriter;
/// assert_eq!("YXNkZg==", &b64_string);
/// ```
///
/// Or, append to an existing String:
///
/// ```
/// use std::io::Write;
///
/// let mut buf = String::from("base64: ");
///
/// let mut enc = base64::write::EncoderStringWriter::from(&mut buf, base64::STANDARD);
///
/// enc.write_all(b"asdf").unwrap();
///
/// // release the &mut reference on buf
/// let _ = enc.into_inner();
///
/// assert_eq!("base64: YXNkZg==", &buf);
/// ```
///
/// # Panics
///
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Performance
///
/// B64-encoded data is buffered in the heap since the point is to collect it in a String.
pub struct EncoderStringWriter {
encoder: EncoderWriter<Vec<u8>>,
/// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain
/// bytes to a `io::Write`.
pub struct EncoderStringWriter<S: StrWrite> {
encoder: EncoderWriter<Utf8SingleCodeUnitWriter<S>>,
}

impl EncoderStringWriter {
/// Create a new EncoderStringWriter that will encode with the provided config.
pub fn new(config: Config) -> EncoderStringWriter {
EncoderStringWriter::from(String::new(), config)
}

/// Create a new EncoderStringWriter that will append to the provided string.
pub fn from(s: String, config: Config) -> EncoderStringWriter {
EncoderStringWriter { encoder: EncoderWriter::new(s.into_bytes(), config) }
impl<S: StrWrite> EncoderStringWriter<S> {
/// Create a EncoderStringWriter that will append to the provided `StrWrite`.
pub fn from(str_writer: S, config: Config) -> Self {
EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_writer }, config) }
}

/// Encode all remaining buffered data, including any trailing incomplete input triples and
Expand All @@ -50,15 +65,21 @@ impl EncoderStringWriter {
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// Returns the base64-encoded form of the accumulated written data.
pub fn into_inner(mut self) -> String {
let buf = self.encoder.finish()
.expect("Writing to a Vec<u8> should never fail");
pub fn into_inner(mut self) -> S {
self.encoder.finish()
.expect("Writing to a Vec<u8> should never fail")
.str_writer
}
}

String::from_utf8(buf).expect("Base64 should always be valid UTF-8")
impl EncoderStringWriter<String> {
/// Create a EncoderStringWriter that will encode into a new String with the provided config.
pub fn new(config: Config) -> Self {
EncoderStringWriter::from(String::new(), config)
}
}

impl<'a> Write for EncoderStringWriter {
impl <S: StrWrite> Write for EncoderStringWriter<S> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.encoder.write(buf)
}
Expand All @@ -68,6 +89,50 @@ impl<'a> Write for EncoderStringWriter {
}
}

/// An abstraction around infallible writes of `str`s.
///
/// Typically, this will just be String.
pub trait StrWrite {
/// The write must succeed, and must write the entire `buf`.
fn write(&mut self, buf: &str);
}

/// As for io::Write, StrWrite is implemented automatically for `&mut S`.
impl<S: StrWrite + ?Sized> StrWrite for &mut S {
fn write(&mut self, buf: &str) {
(**self).write(buf)
}
}

impl StrWrite for String {
fn write(&mut self, buf: &str) {
self.push_str(buf)
}
}

/// A `Write` that only can handle bytes that are valid single-byte UTF-8 code units.
///
/// This is safe because we only use it when writing base64, which is always valid UTF-8.
struct Utf8SingleCodeUnitWriter<S: StrWrite> {
str_writer: S
}

impl<S: StrWrite> io::Write for Utf8SingleCodeUnitWriter<S> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let s = std::str::from_utf8(buf)
.expect("Input must be valid UTF-8");

self.str_writer.write(s);

Ok(buf.len())
}

fn flush(&mut self) -> io::Result<()> {
// no op
Ok(())
}
}

#[cfg(test)]
mod tests {
use crate::encode_config_buf;
Expand Down

0 comments on commit 5a56885

Please sign in to comment.