Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EncoderStringWriter #142

Merged
merged 5 commits into from
Sep 28, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Next

- Config methods are const
- Added `EncoderStringWriter` to allow encoding directly to a String
- `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`

# 0.12.2

Add `BinHex` alphabet
- Add `BinHex` alphabet

# 0.12.1

97 changes: 60 additions & 37 deletions src/write/encoder.rs
Original file line number Diff line number Diff line change
@@ -25,27 +25,24 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
/// use std::io::Write;
///
/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
/// let mut wrapped_writer = Vec::new();
/// {
/// let mut enc = base64::write::EncoderWriter::new(
/// &mut wrapped_writer, base64::STANDARD);
/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD);
///
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors
/// enc.finish().unwrap();
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
///
/// }
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors or getting the delegate writer back
/// let delegate = enc.finish().unwrap();
///
/// // base64 was written to the writer
/// assert_eq!(b"YXNkZg==", &wrapped_writer[..]);
/// assert_eq!(b"YXNkZg==", &delegate[..]);
///
/// ```
///
/// # Panics
///
/// Calling `write()` after `finish()` is invalid and will panic.
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Errors
///
@@ -56,10 +53,12 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
///
/// It has some minor performance loss compared to encoding slices (a couple percent).
/// It does not do any heap allocation.
pub struct EncoderWriter<'a, W: 'a + Write> {
pub struct EncoderWriter<W: Write> {
config: Config,
/// Where encoded data is written to
w: &'a mut W,
/// Where encoded data is written to. It's an Option as it's None immediately before Drop is
/// called so that finish() can return the underlying writer. None implies that finish() has
/// been called successfully.
delegate: Option<W>,
/// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
/// with the next `write()`, encode it, then proceed with the rest of the input normally.
extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
@@ -70,13 +69,11 @@ pub struct EncoderWriter<'a, W: 'a + Write> {
output: [u8; BUF_SIZE],
/// How much of `output` is occupied with encoded data that couldn't be written last time
output_occupied_len: usize,
/// True iff padding / partial last chunk has been written.
finished: bool,
/// panic safety: don't write again in destructor if writer panicked while we were writing to it
panicked: bool,
}

impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
impl<W: Write> fmt::Debug for EncoderWriter<W> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
@@ -89,38 +86,58 @@ impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
}
}

impl<'a, W: Write> EncoderWriter<'a, W> {
impl<W: Write> EncoderWriter<W> {
/// Create a new encoder that will write to the provided delegate writer `w`.
pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> {
pub fn new(w: W, config: Config) -> EncoderWriter<W> {
EncoderWriter {
config,
w,
delegate: Some(w),
extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
extra_input_occupied_len: 0,
output: [0u8; BUF_SIZE],
output_occupied_len: 0,
finished: false,
panicked: false,
}
}

/// Encode all remaining buffered data and write it, including any trailing incomplete input
/// triples and associated padding.
///
/// Once this succeeds, no further writes can be performed, as that would produce invalid
/// base64.
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided
/// to its `write` each invocation.
/// This may write to the delegate writer multiple times if the delegate writer does not accept
/// all input provided to its `write` each invocation.
///
/// If you don't care about error handling, it is not necessary to call this function, as the
/// equivalent finalization is done by the Drop impl.
///
/// Returns the writer that this was constructed around.
///
/// # Errors
///
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
pub fn finish(&mut self) -> Result<()> {
if self.finished {
return Ok(());
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
pub fn finish(&mut self) -> Result<W> {
// If we could consume self in finish(), we wouldn't have to worry about this case, but
// finish() is retryable in the face of I/O errors, so we can't consume here.
if self.delegate.is_none() {
panic!("Encoder has already had finish() called")
};

self.write_final_leftovers()?;

let writer = self.delegate.take().expect("Writer must be present");

Ok(writer)
}

/// Write any remaining buffered data to the delegate writer.
fn write_final_leftovers(&mut self) -> Result<()> {
if self.delegate.is_none() {
// finish() has already successfully called this, and we are now in drop() with a None
// writer, so just no-op
return Ok(());
}

self.write_all_encoded_output()?;

if self.extra_input_occupied_len > 0 {
@@ -138,7 +155,6 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
self.extra_input_occupied_len = 0;
}

self.finished = true;
Ok(())
}

@@ -152,7 +168,11 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
/// that no write took place.
fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
self.panicked = true;
let res = self.w.write(&self.output[..current_output_len]);
let res = self
.delegate
.as_mut()
.expect("Writer must be present")
.write(&self.output[..current_output_len]);
self.panicked = false;

res.map(|consumed| {
@@ -197,7 +217,7 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
}
}

impl<'a, W: Write> Write for EncoderWriter<'a, W> {
impl<W: Write> Write for EncoderWriter<W> {
/// Encode input and then write to the delegate writer.
///
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
@@ -215,7 +235,7 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {
///
/// Any errors emitted by the delegate writer are returned.
fn write(&mut self, input: &[u8]) -> Result<usize> {
if self.finished {
if self.delegate.is_none() {
panic!("Cannot write more after calling finish()");
}

@@ -341,15 +361,18 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {
/// incomplete chunks of input or write padding.
fn flush(&mut self) -> Result<()> {
self.write_all_encoded_output()?;
self.w.flush()
self.delegate
.as_mut()
.expect("Writer must be present")
.flush()
}
}

impl<'a, W: Write> Drop for EncoderWriter<'a, W> {
impl<W: Write> Drop for EncoderWriter<W> {
fn drop(&mut self) {
if !self.panicked {
// like `BufWriter`, ignore errors during drop
let _ = self.finish();
let _ = self.write_final_leftovers();
}
}
}
107 changes: 107 additions & 0 deletions src/write/encoder_string_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use crate::Config;
use std::io;
use std::io::Write;
use super::encoder::EncoderWriter;

/// A `Write` implementation that base64-encodes data using the provided config and accumulates the
/// resulting base64 in memory, which is then exposed as a String via `finish()`.
///
/// # Examples
///
/// ```
/// use std::io::Write;
///
/// let mut enc = base64::write::EncoderStringWriter::new(base64::STANDARD);
///
/// enc.write_all(b"asdf").unwrap();
///
/// // get the resulting String
/// let b64_string = enc.finish().unwrap();
///
/// assert_eq!("YXNkZg==", &b64_string);
/// ```
///
/// # Panics
///
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Performance
///
/// B64-encoded data is buffered in the heap since the point is to collect it in a String.
pub struct EncoderStringWriter {
encoder: EncoderWriter<Vec<u8>>,
}

impl EncoderStringWriter {
/// Create a new EncoderStringWriter that will encode with the provided config.
pub fn new(config: Config) -> EncoderStringWriter {
EncoderStringWriter { encoder: EncoderWriter::new(Vec::new(), config) }
}

/// Encode all remaining buffered data, including any trailing incomplete input triples and
/// associated padding.
///
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// Returns the base64-encoded form of the accumulated written data.
///
/// # Errors
///
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
pub fn finish(&mut self) -> io::Result<String> {
let buf = self.encoder.finish()?;

let str = String::from_utf8(buf).expect("Base64 should always be valid UTF-8");
Ok(str)
}
}

impl<'a> Write for EncoderStringWriter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.encoder.write(buf)
}

fn flush(&mut self) -> io::Result<()> {
self.encoder.flush()
}
}

#[cfg(test)]
mod tests {
use crate::encode_config_buf;
use crate::tests::random_config;
use rand::Rng;
use std::io::Write;
use crate::write::encoder_string_writer::EncoderStringWriter;

#[test]
fn every_possible_split_of_input() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut normal_encoded = String::new();

let size = 5_000;

for i in 0..size {
orig_data.clear();
normal_encoded.clear();

for _ in 0..size {
orig_data.push(rng.gen());
}

let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut normal_encoded);

let mut stream_encoder = EncoderStringWriter::new(config);
// Write the first i bytes, then the rest
stream_encoder.write_all(&orig_data[0..i]).unwrap();
stream_encoder.write_all(&orig_data[i..]).unwrap();

let stream_encoded = stream_encoder.finish().unwrap();

assert_eq!(normal_encoded, stream_encoded);
}
}
}
4 changes: 2 additions & 2 deletions src/write/encoder_tests.rs
Original file line number Diff line number Diff line change
@@ -436,7 +436,7 @@ fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_
}
}

stream_encoder.finish().unwrap();
let _ = stream_encoder.finish().unwrap();

assert_eq!(orig_len, bytes_consumed);
}
@@ -500,7 +500,7 @@ fn do_encode_random_config_matches_normal_encode(max_input_len: usize) {
bytes_consumed += input_len;
}

stream_encoder.finish().unwrap();
let _ = stream_encoder.finish().unwrap();

assert_eq!(orig_len, bytes_consumed);
}
2 changes: 2 additions & 0 deletions src/write/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
//! Implementations of `io::Write` to transparently handle base64.
mod encoder;
mod encoder_string_writer;
pub use self::encoder::EncoderWriter;
pub use self::encoder_string_writer::EncoderStringWriter;

#[cfg(test)]
mod encoder_tests;