Skip to content

Commit

Permalink
Merge pull request #142 from marshallpierce/mp/string-writer
Browse files Browse the repository at this point in the history
Add EncoderStringWriter
  • Loading branch information
marshallpierce authored Sep 28, 2020
2 parents 2dc0296 + 8b1ae22 commit 5b40e0c
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 40 deletions.
9 changes: 8 additions & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Next

- Config methods are const
- Added `EncoderStringWriter` to allow encoding directly to a String
- `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`.

# 0.12.2

Add `BinHex` alphabet
- Add `BinHex` alphabet

# 0.12.1

Expand Down
28 changes: 28 additions & 0 deletions benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,32 @@ fn do_encode_bench_stream(b: &mut Bencher, &size: &usize) {
});
}

fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {
let mut v: Vec<u8> = Vec::with_capacity(size);
fill(&mut v);

b.iter(|| {
let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG);
stream_enc.write_all(&v).unwrap();
stream_enc.flush().unwrap();
let _ = stream_enc.into_inner();
});
}

fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) {
let mut v: Vec<u8> = Vec::with_capacity(size);
fill(&mut v);

let mut buf = String::new();
b.iter(|| {
buf.clear();
let mut stream_enc = write::EncoderStringWriter::from(&mut buf, TEST_CONFIG);
stream_enc.write_all(&v).unwrap();
stream_enc.flush().unwrap();
let _ = stream_enc.into_inner();
});
}

fn fill(v: &mut Vec<u8>) {
let cap = v.capacity();
// weak randomness is plenty; we just want to not be completely friendly to the branch predictor
Expand All @@ -147,6 +173,8 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
.with_function("encode_reuse_buf", do_encode_bench_reuse_buf)
.with_function("encode_slice", do_encode_bench_slice)
.with_function("encode_reuse_buf_stream", do_encode_bench_stream)
.with_function("encode_string_stream", do_encode_bench_string_stream)
.with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream)
}

fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
Expand Down
6 changes: 6 additions & 0 deletions examples/make_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,14 @@ fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) {
}

fn check_alphabet(alphabet: &[u8]) {
// ensure all characters are distinct
assert_eq!(64, alphabet.len());
let mut set: HashSet<u8> = HashSet::new();
set.extend(alphabet);
assert_eq!(64, set.len());

// must be ASCII to be valid as single UTF-8 bytes
for &b in alphabet {
assert!(b <= 0x7F_u8);
}
}
100 changes: 63 additions & 37 deletions src/write/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,24 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
/// use std::io::Write;
///
/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
/// let mut wrapped_writer = Vec::new();
/// {
/// let mut enc = base64::write::EncoderWriter::new(
/// &mut wrapped_writer, base64::STANDARD);
/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD);
///
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors
/// enc.finish().unwrap();
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
///
/// }
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors or getting the delegate writer back
/// let delegate = enc.finish().unwrap();
///
/// // base64 was written to the writer
/// assert_eq!(b"YXNkZg==", &wrapped_writer[..]);
/// assert_eq!(b"YXNkZg==", &delegate[..]);
///
/// ```
///
/// # Panics
///
/// Calling `write()` after `finish()` is invalid and will panic.
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Errors
///
Expand All @@ -56,10 +53,12 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
///
/// It has some minor performance loss compared to encoding slices (a couple percent).
/// It does not do any heap allocation.
pub struct EncoderWriter<'a, W: 'a + Write> {
pub struct EncoderWriter<W: Write> {
config: Config,
/// Where encoded data is written to
w: &'a mut W,
/// Where encoded data is written to. It's an Option as it's None immediately before Drop is
/// called so that finish() can return the underlying writer. None implies that finish() has
/// been called successfully.
delegate: Option<W>,
/// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
/// with the next `write()`, encode it, then proceed with the rest of the input normally.
extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
Expand All @@ -70,13 +69,11 @@ pub struct EncoderWriter<'a, W: 'a + Write> {
output: [u8; BUF_SIZE],
/// How much of `output` is occupied with encoded data that couldn't be written last time
output_occupied_len: usize,
/// True iff padding / partial last chunk has been written.
finished: bool,
/// panic safety: don't write again in destructor if writer panicked while we were writing to it
panicked: bool,
}

impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
impl<W: Write> fmt::Debug for EncoderWriter<W> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
Expand All @@ -89,38 +86,58 @@ impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
}
}

impl<'a, W: Write> EncoderWriter<'a, W> {
impl<W: Write> EncoderWriter<W> {
/// Create a new encoder that will write to the provided delegate writer `w`.
pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> {
pub fn new(w: W, config: Config) -> EncoderWriter<W> {
EncoderWriter {
config,
w,
delegate: Some(w),
extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
extra_input_occupied_len: 0,
output: [0u8; BUF_SIZE],
output_occupied_len: 0,
finished: false,
panicked: false,
}
}

/// Encode all remaining buffered data and write it, including any trailing incomplete input
/// triples and associated padding.
///
/// Once this succeeds, no further writes can be performed, as that would produce invalid
/// base64.
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided
/// to its `write` each invocation.
/// This may write to the delegate writer multiple times if the delegate writer does not accept
/// all input provided to its `write` each invocation.
///
/// If you don't care about error handling, it is not necessary to call this function, as the
/// equivalent finalization is done by the Drop impl.
///
/// Returns the writer that this was constructed around.
///
/// # Errors
///
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
pub fn finish(&mut self) -> Result<()> {
if self.finished {
return Ok(());
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
pub fn finish(&mut self) -> Result<W> {
// If we could consume self in finish(), we wouldn't have to worry about this case, but
// finish() is retryable in the face of I/O errors, so we can't consume here.
if self.delegate.is_none() {
panic!("Encoder has already had finish() called")
};

self.write_final_leftovers()?;

let writer = self.delegate.take().expect("Writer must be present");

Ok(writer)
}

/// Write any remaining buffered data to the delegate writer.
fn write_final_leftovers(&mut self) -> Result<()> {
if self.delegate.is_none() {
// finish() has already successfully called this, and we are now in drop() with a None
// writer, so just no-op
return Ok(());
}

self.write_all_encoded_output()?;

if self.extra_input_occupied_len > 0 {
Expand All @@ -138,7 +155,6 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
self.extra_input_occupied_len = 0;
}

self.finished = true;
Ok(())
}

Expand All @@ -152,7 +168,11 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
/// that no write took place.
fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
self.panicked = true;
let res = self.w.write(&self.output[..current_output_len]);
let res = self
.delegate
.as_mut()
.expect("Writer must be present")
.write(&self.output[..current_output_len]);
self.panicked = false;

res.map(|consumed| {
Expand Down Expand Up @@ -197,7 +217,7 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
}
}

impl<'a, W: Write> Write for EncoderWriter<'a, W> {
impl<W: Write> Write for EncoderWriter<W> {
/// Encode input and then write to the delegate writer.
///
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
Expand All @@ -215,7 +235,7 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {
///
/// Any errors emitted by the delegate writer are returned.
fn write(&mut self, input: &[u8]) -> Result<usize> {
if self.finished {
if self.delegate.is_none() {
panic!("Cannot write more after calling finish()");
}

Expand Down Expand Up @@ -339,17 +359,23 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {

/// Because this is usually treated as OK to call multiple times, it will *not* flush any
/// incomplete chunks of input or write padding.
/// # Errors
///
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
fn flush(&mut self) -> Result<()> {
self.write_all_encoded_output()?;
self.w.flush()
self.delegate
.as_mut()
.expect("Writer must be present")
.flush()
}
}

impl<'a, W: Write> Drop for EncoderWriter<'a, W> {
impl<W: Write> Drop for EncoderWriter<W> {
fn drop(&mut self) {
if !self.panicked {
// like `BufWriter`, ignore errors during drop
let _ = self.finish();
let _ = self.write_final_leftovers();
}
}
}
Loading

0 comments on commit 5b40e0c

Please sign in to comment.