Skip to content

Commit

Permalink
Document that read::GzDecoder consumes bytes after end of gzip
Browse files Browse the repository at this point in the history
Add tests showing that the `GzDecoder`s in `bufread` and `write`
support reading immediately after end of gzip data.

Co-authored-by: Sebastian Thiel <sebastian.thiel@icloud.com>
  • Loading branch information
jongiddy and Byron committed Aug 1, 2023
1 parent 956397a commit b2079e3
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 2 deletions.
47 changes: 47 additions & 0 deletions src/gz/bufread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,3 +432,50 @@ impl<R: BufRead> Read for MultiGzDecoder<R> {
self.0.read(into)
}
}

#[cfg(test)]
mod test {
use crate::bufread::GzDecoder;
use crate::gz::write;
use crate::Compression;
use std::io::{Read, Write};

// GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
// additional data to be consumed by the caller.
#[test]
fn decode_extra_data() {
let expected = "Hello World";

let compressed = {
let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
e.write(expected.as_ref()).unwrap();
let mut b = e.finish().unwrap();
b.push(b'x');
b
};

let mut output = Vec::new();
let mut decoder = GzDecoder::new(compressed.as_slice());
let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
assert_eq!(decoded_bytes, output.len());
let actual = std::str::from_utf8(&output).expect("String parsing error");
assert_eq!(
actual, expected,
"after decompression we obtain the original input"
);

output.clear();
assert_eq!(
decoder.read(&mut output).unwrap(),
0,
"subsequent read of decoder returns 0, but inner reader can return additional data"
);
let mut reader = decoder.into_inner();
assert_eq!(
reader.read_to_end(&mut output).unwrap(),
1,
"extra data is accessible in underlying buf-read"
);
assert_eq!(output, b"x");
}
}
14 changes: 12 additions & 2 deletions src/gz/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl<R: Read + Write> Write for GzEncoder<R> {
}
}

/// A decoder for the first member of a [gzip file].
/// A decoder for a single member of a [gzip file].
///
/// This structure exposes a [`Read`] interface that will consume compressed
/// data from the underlying reader and emit uncompressed data.
Expand Down Expand Up @@ -155,19 +155,29 @@ impl<R> GzDecoder<R> {
}

/// Acquires a reference to the underlying reader.
///
/// Note that the decoder may have read past the end of the gzip data.
/// To prevent this use [`bufread::GzDecoder`] instead.
pub fn get_ref(&self) -> &R {
self.inner.get_ref().get_ref()
}

/// Acquires a mutable reference to the underlying stream.
///
/// Note that mutation of the stream may result in surprising results if
/// this decoder is continued to be used.
/// this decoder continues to be used.
///
/// Note that the decoder may have read past the end of the gzip data.
/// To prevent this use [`bufread::GzDecoder`] instead.
pub fn get_mut(&mut self) -> &mut R {
self.inner.get_mut().get_mut()
}

/// Consumes this decoder, returning the underlying reader.
///
/// Note that the decoder may have read past the end of the gzip data.
/// Subsequent reads will skip those bytes. To prevent this use
/// [`bufread::GzDecoder`] instead.
pub fn into_inner(self) -> R {
self.inner.into_inner().into_inner()
}
Expand Down
28 changes: 28 additions & 0 deletions src/gz/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -610,4 +610,32 @@ mod tests {
let expected = STR.repeat(2);
assert_eq!(return_string, expected);
}

// GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
// additional data to be consumed by the caller.
#[test]
fn decode_extra_data() {
let compressed = {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
let mut b = e.finish().unwrap();
b.push(b'x');
b
};

let mut writer = Vec::new();
let mut decoder = GzDecoder::new(writer);
let mut consumed_bytes = 0;
loop {
let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
if n == 0 {
break;
}
consumed_bytes += n;
}
writer = decoder.finish().unwrap();
let actual = String::from_utf8(writer).expect("String parsing error");
assert_eq!(actual, STR);
assert_eq!(&compressed[consumed_bytes..], b"x");
}
}
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,14 @@ mod zlib;
/// Types which operate over [`Read`] streams, both encoders and decoders for
/// various formats.
///
/// Note that the `read` decoder types may read past the end of the compressed
/// data while decoding. If the caller requires subsequent reads to start
/// immediately following the compressed data wrap the `Read` type in a
/// [`BufReader`] and use the `BufReader` with the equivalent decoder from the
/// `bufread` module and also for the subsequent reads.
///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
/// [`BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html
pub mod read {
pub use crate::deflate::read::DeflateDecoder;
pub use crate::deflate::read::DeflateEncoder;
Expand Down

0 comments on commit b2079e3

Please sign in to comment.