Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EncoderStringWriter #142

Merged
merged 5 commits into from
Sep 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Next

- Config methods are const
- Added `EncoderStringWriter` to allow encoding directly to a String
- `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`.

# 0.12.2

Add `BinHex` alphabet
- Add `BinHex` alphabet

# 0.12.1

Expand Down
28 changes: 28 additions & 0 deletions benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,32 @@ fn do_encode_bench_stream(b: &mut Bencher, &size: &usize) {
});
}

fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {
let mut v: Vec<u8> = Vec::with_capacity(size);
fill(&mut v);

b.iter(|| {
let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG);
stream_enc.write_all(&v).unwrap();
stream_enc.flush().unwrap();
let _ = stream_enc.into_inner();
});
}

fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) {
let mut v: Vec<u8> = Vec::with_capacity(size);
fill(&mut v);

let mut buf = String::new();
b.iter(|| {
buf.clear();
let mut stream_enc = write::EncoderStringWriter::from(&mut buf, TEST_CONFIG);
stream_enc.write_all(&v).unwrap();
stream_enc.flush().unwrap();
let _ = stream_enc.into_inner();
});
}

fn fill(v: &mut Vec<u8>) {
let cap = v.capacity();
// weak randomness is plenty; we just want to not be completely friendly to the branch predictor
Expand All @@ -147,6 +173,8 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
.with_function("encode_reuse_buf", do_encode_bench_reuse_buf)
.with_function("encode_slice", do_encode_bench_slice)
.with_function("encode_reuse_buf_stream", do_encode_bench_stream)
.with_function("encode_string_stream", do_encode_bench_string_stream)
.with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream)
}

fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
Expand Down
6 changes: 6 additions & 0 deletions examples/make_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,14 @@ fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) {
}

fn check_alphabet(alphabet: &[u8]) {
// ensure all characters are distinct
assert_eq!(64, alphabet.len());
let mut set: HashSet<u8> = HashSet::new();
set.extend(alphabet);
assert_eq!(64, set.len());

// must be ASCII to be valid as single UTF-8 bytes
for &b in alphabet {
assert!(b <= 0x7F_u8);
}
}
100 changes: 63 additions & 37 deletions src/write/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,24 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
/// use std::io::Write;
///
/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
/// let mut wrapped_writer = Vec::new();
/// {
/// let mut enc = base64::write::EncoderWriter::new(
/// &mut wrapped_writer, base64::STANDARD);
/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD);
///
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors
/// enc.finish().unwrap();
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
///
/// }
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors or getting the delegate writer back
/// let delegate = enc.finish().unwrap();
///
/// // base64 was written to the writer
/// assert_eq!(b"YXNkZg==", &wrapped_writer[..]);
/// assert_eq!(b"YXNkZg==", &delegate[..]);
///
/// ```
///
/// # Panics
///
/// Calling `write()` after `finish()` is invalid and will panic.
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Errors
///
Expand All @@ -56,10 +53,12 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
///
/// It has some minor performance loss compared to encoding slices (a couple percent).
/// It does not do any heap allocation.
pub struct EncoderWriter<'a, W: 'a + Write> {
pub struct EncoderWriter<W: Write> {
config: Config,
/// Where encoded data is written to
w: &'a mut W,
/// Where encoded data is written to. It's an Option as it's None immediately before Drop is
/// called so that finish() can return the underlying writer. None implies that finish() has
/// been called successfully.
delegate: Option<W>,
/// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
/// with the next `write()`, encode it, then proceed with the rest of the input normally.
extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
Expand All @@ -70,13 +69,11 @@ pub struct EncoderWriter<'a, W: 'a + Write> {
output: [u8; BUF_SIZE],
/// How much of `output` is occupied with encoded data that couldn't be written last time
output_occupied_len: usize,
/// True iff padding / partial last chunk has been written.
finished: bool,
/// panic safety: don't write again in destructor if writer panicked while we were writing to it
panicked: bool,
}

impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
impl<W: Write> fmt::Debug for EncoderWriter<W> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
Expand All @@ -89,38 +86,58 @@ impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
}
}

impl<'a, W: Write> EncoderWriter<'a, W> {
impl<W: Write> EncoderWriter<W> {
/// Create a new encoder that will write to the provided delegate writer `w`.
pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> {
pub fn new(w: W, config: Config) -> EncoderWriter<W> {
EncoderWriter {
config,
w,
delegate: Some(w),
extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
extra_input_occupied_len: 0,
output: [0u8; BUF_SIZE],
output_occupied_len: 0,
finished: false,
panicked: false,
}
}

/// Encode all remaining buffered data and write it, including any trailing incomplete input
/// triples and associated padding.
///
/// Once this succeeds, no further writes can be performed, as that would produce invalid
/// base64.
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided
/// to its `write` each invocation.
/// This may write to the delegate writer multiple times if the delegate writer does not accept
/// all input provided to its `write` each invocation.
///
/// If you don't care about error handling, it is not necessary to call this function, as the
/// equivalent finalization is done by the Drop impl.
///
/// Returns the writer that this was constructed around.
///
/// # Errors
///
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
pub fn finish(&mut self) -> Result<()> {
if self.finished {
return Ok(());
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
pub fn finish(&mut self) -> Result<W> {
marshallpierce marked this conversation as resolved.
Show resolved Hide resolved
// If we could consume self in finish(), we wouldn't have to worry about this case, but
// finish() is retryable in the face of I/O errors, so we can't consume here.
if self.delegate.is_none() {
panic!("Encoder has already had finish() called")
};

self.write_final_leftovers()?;

let writer = self.delegate.take().expect("Writer must be present");

Ok(writer)
}

/// Write any remaining buffered data to the delegate writer.
fn write_final_leftovers(&mut self) -> Result<()> {
if self.delegate.is_none() {
// finish() has already successfully called this, and we are now in drop() with a None
// writer, so just no-op
return Ok(());
}

self.write_all_encoded_output()?;

if self.extra_input_occupied_len > 0 {
Expand All @@ -138,7 +155,6 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
self.extra_input_occupied_len = 0;
}

self.finished = true;
Ok(())
}

Expand All @@ -152,7 +168,11 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
/// that no write took place.
fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
self.panicked = true;
let res = self.w.write(&self.output[..current_output_len]);
let res = self
.delegate
.as_mut()
.expect("Writer must be present")
.write(&self.output[..current_output_len]);
self.panicked = false;

res.map(|consumed| {
Expand Down Expand Up @@ -197,7 +217,7 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
}
}

impl<'a, W: Write> Write for EncoderWriter<'a, W> {
impl<W: Write> Write for EncoderWriter<W> {
/// Encode input and then write to the delegate writer.
///
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
Expand All @@ -215,7 +235,7 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {
///
/// Any errors emitted by the delegate writer are returned.
fn write(&mut self, input: &[u8]) -> Result<usize> {
if self.finished {
if self.delegate.is_none() {
panic!("Cannot write more after calling finish()");
}

Expand Down Expand Up @@ -339,17 +359,23 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {

/// Because this is usually treated as OK to call multiple times, it will *not* flush any
/// incomplete chunks of input or write padding.
/// # Errors
///
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
fn flush(&mut self) -> Result<()> {
self.write_all_encoded_output()?;
self.w.flush()
self.delegate
.as_mut()
.expect("Writer must be present")
.flush()
}
}

impl<'a, W: Write> Drop for EncoderWriter<'a, W> {
impl<W: Write> Drop for EncoderWriter<W> {
fn drop(&mut self) {
if !self.panicked {
// like `BufWriter`, ignore errors during drop
let _ = self.finish();
let _ = self.write_final_leftovers();
}
}
}
Loading