From 5a56885c655deb54ccb206165e8351b0476c78b8 Mon Sep 17 00:00:00 2001 From: Marshall Pierce Date: Fri, 25 Sep 2020 17:53:00 -0600 Subject: [PATCH] Introduce StrWriter to allow ESW to wrap both a String and a &mut String --- benches/benchmarks.rs | 15 +++++ examples/make_tables.rs | 6 ++ src/write/encoder_string_writer.rs | 99 +++++++++++++++++++++++++----- 3 files changed, 103 insertions(+), 17 deletions(-) diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index e98cb938..3d27bbb7 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -129,6 +129,20 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) { b.iter(|| { let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG); + stream_enc.write_all(&v).unwrap(); + stream_enc.flush().unwrap(); + let _ = stream_enc.into_inner(); + }); +} + +fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) { + let mut v: Vec = Vec::with_capacity(size); + fill(&mut v); + + let mut buf = String::new(); + b.iter(|| { + buf.clear(); + let mut stream_enc = write::EncoderStringWriter::from(&mut buf, TEST_CONFIG); stream_enc.write_all(&v).unwrap(); stream_enc.flush().unwrap(); let _ = stream_enc.into_inner(); @@ -160,6 +174,7 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark { .with_function("encode_slice", do_encode_bench_slice) .with_function("encode_reuse_buf_stream", do_encode_bench_stream) .with_function("encode_string_stream", do_encode_bench_string_stream) + .with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream) } fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark { diff --git a/examples/make_tables.rs b/examples/make_tables.rs index 5ef3075f..db6fcf2b 100644 --- a/examples/make_tables.rs +++ b/examples/make_tables.rs @@ -164,8 +164,14 @@ fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) { } fn check_alphabet(alphabet: &[u8]) { + // ensure all characters are distinct assert_eq!(64, alphabet.len()); let mut set: HashSet = HashSet::new(); set.extend(alphabet); assert_eq!(64, set.len()); + + // must be ASCII to be valid as single UTF-8 bytes + for &b in alphabet { + assert!(b <= 0x7F_u8); + } } diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs index fad3499b..2b19ccf5 100644 --- a/src/write/encoder_string_writer.rs +++ b/src/write/encoder_string_writer.rs @@ -8,6 +8,8 @@ use super::encoder::EncoderWriter; /// /// # Examples /// +/// Buffer base64 in a new String: +/// /// ``` /// use std::io::Write; /// @@ -21,6 +23,23 @@ use super::encoder::EncoderWriter; /// assert_eq!("YXNkZg==", &b64_string); /// ``` /// +/// Or, append to an existing String: +/// +/// ``` +/// use std::io::Write; +/// +/// let mut buf = String::from("base64: "); +/// +/// let mut enc = base64::write::EncoderStringWriter::from(&mut buf, base64::STANDARD); +/// +/// enc.write_all(b"asdf").unwrap(); +/// +/// // release the &mut reference on buf +/// let _ = enc.into_inner(); +/// +/// assert_eq!("base64: YXNkZg==", &buf); +/// ``` +/// /// # Panics /// /// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without @@ -28,20 +47,16 @@ use super::encoder::EncoderWriter; /// /// # Performance /// -/// B64-encoded data is buffered in the heap since the point is to collect it in a String. -pub struct EncoderStringWriter { - encoder: EncoderWriter>, +/// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain +/// bytes to a `io::Write`. +pub struct EncoderStringWriter { + encoder: EncoderWriter>, } -impl EncoderStringWriter { - /// Create a new EncoderStringWriter that will encode with the provided config. - pub fn new(config: Config) -> EncoderStringWriter { - EncoderStringWriter::from(String::new(), config) - } - - /// Create a new EncoderStringWriter that will append to the provided string. - pub fn from(s: String, config: Config) -> EncoderStringWriter { - EncoderStringWriter { encoder: EncoderWriter::new(s.into_bytes(), config) } +impl EncoderStringWriter { + /// Create a EncoderStringWriter that will append to the provided `StrWrite`. + pub fn from(str_writer: S, config: Config) -> Self { + EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_writer }, config) } } /// Encode all remaining buffered data, including any trailing incomplete input triples and @@ -50,15 +65,21 @@ impl EncoderStringWriter { /// Once this succeeds, no further writes or calls to this method are allowed. /// /// Returns the base64-encoded form of the accumulated written data. - pub fn into_inner(mut self) -> String { - let buf = self.encoder.finish() - .expect("Writing to a Vec should never fail"); + pub fn into_inner(mut self) -> S { + self.encoder.finish() + .expect("Writing to a Vec should never fail") + .str_writer + } +} - String::from_utf8(buf).expect("Base64 should always be valid UTF-8") +impl EncoderStringWriter { + /// Create a EncoderStringWriter that will encode into a new String with the provided config. + pub fn new(config: Config) -> Self { + EncoderStringWriter::from(String::new(), config) } } -impl<'a> Write for EncoderStringWriter { +impl Write for EncoderStringWriter { fn write(&mut self, buf: &[u8]) -> io::Result { self.encoder.write(buf) } @@ -68,6 +89,50 @@ impl<'a> Write for EncoderStringWriter { } } +/// An abstraction around infallible writes of `str`s. +/// +/// Typically, this will just be String. +pub trait StrWrite { + /// The write must succeed, and must write the entire `buf`. + fn write(&mut self, buf: &str); +} + +/// As for io::Write, StrWrite is implemented automatically for `&mut S`. +impl StrWrite for &mut S { + fn write(&mut self, buf: &str) { + (**self).write(buf) + } +} + +impl StrWrite for String { + fn write(&mut self, buf: &str) { + self.push_str(buf) + } +} + +/// A `Write` that only can handle bytes that are valid single-byte UTF-8 code units. +/// +/// This is safe because we only use it when writing base64, which is always valid UTF-8. +struct Utf8SingleCodeUnitWriter { + str_writer: S +} + +impl io::Write for Utf8SingleCodeUnitWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let s = std::str::from_utf8(buf) + .expect("Input must be valid UTF-8"); + + self.str_writer.write(s); + + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + // no op + Ok(()) + } +} + #[cfg(test)] mod tests { use crate::encode_config_buf;