diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 5b5061a3..e65c2eb6 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -167,12 +167,21 @@ impl Write for GzEncoder { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// -/// This structure consumes a [`BufRead`] interface, reading compressed data +/// This structure exposes a [`BufRead`] interface, reading compressed data /// from the underlying reader, and emitting uncompressed data. -/// Use [`MultiGzDecoder`] if your file has multiple streams. /// +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// If you need the following bytes, call `into_inner()` after Ok(0) to +/// recover the underlying reader. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -397,20 +406,19 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. +/// +/// This structure exposes a [`BufRead`] interface that will consume compressed +/// data from the underlying reader and emit uncompressed data. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while [`GzDecoder`] will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. It's also useful -/// to compress large amounts of data in parallel where each thread produces one stream -/// for a chunk of input data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the +/// underlying reader does. For a file, this reads to the end of the file. /// -/// This structure exposes a [`BufRead`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples diff --git a/src/gz/read.rs b/src/gz/read.rs index 2a16a6ac..2f923731 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -90,13 +90,22 @@ impl Write for GzEncoder { } } -/// A gzip streaming decoder +/// A decoder for the first member of a [gzip file]. /// /// This structure exposes a [`Read`] interface that will consume compressed /// data from the underlying reader and emit uncompressed data. -/// Use [`MultiGzDecoder`] if your file has multiple streams. /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// `GzDecoder` may have read additional bytes past the end of the gzip data. +/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader` +/// and use `bufread::GzDecoder` instead. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// @@ -180,21 +189,19 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while [`GzDecoder`] will only decompress the -/// first gzip member. The multistream format is commonly used in bioinformatics, -/// for example when using the BGZF compressed data. It's also useful -/// to compress large amounts of data in parallel where each thread produces one stream -/// for a chunk of input data. +/// This structure exposes a [`Read`] interface that will consume compressed +/// data from the underlying reader and emit uncompressed data. /// -/// This structure exposes a [`Read`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the +/// underlying reader does. /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// diff --git a/src/gz/write.rs b/src/gz/write.rs index dd8a6bd0..feda221e 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -166,12 +166,20 @@ impl Drop for GzEncoder { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// -/// This structure exposes a [`Write`] interface that will emit uncompressed data -/// to the underlying writer `W`. -/// Use [`MultiGzDecoder`] if your file has multiple streams. +/// This structure exposes a [`Write`] interface, receiving compressed data and +/// writing uncompressed data to the underlying writer. +/// +/// After decoding a single member of the gzip data this writer will return the number of bytes up to +/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to +/// handle any data following the gzip member. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html /// /// # Examples @@ -373,17 +381,19 @@ impl Read for GzDecoder { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] with multiple members. +/// +/// This structure exposes a [`Write`] interface that will consume compressed data and +/// write uncompressed data to the underlying writer. +/// +/// A gzip file consists of a series of *members* concatenated one after another. +/// `MultiGzDecoder` decodes all members of a file and writes them to the +/// underlying writer one after another. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. +/// To handle members separately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// -/// This structure exposes a [`Write`] interface that will consume all gzip members -/// from the written buffers and write uncompressed data to the writer. +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 #[derive(Debug)] pub struct MultiGzDecoder { inner: GzDecoder, diff --git a/src/lib.rs b/src/lib.rs index 738875c5..127e2354 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,12 +65,30 @@ //! `Write` trait if `T: Write`. That is, the "dual trait" is forwarded directly //! to the underlying object if available. //! +//! # About multi-member Gzip files +//! +//! While most `gzip` files one encounters will have a single *member* that can be read +//! with the [`GzDecoder`], there may be some files which have multiple members. +//! +//! A [`GzDecoder`] will only read the first member of gzip data, which may unexpectedly +//! provide partial results when a multi-member gzip file is encountered. `GzDecoder` is appropriate +//! for data that is designed to be read as single members from a multi-member file. `bufread::GzDecoder` +//! and `write::GzDecoder` also allow non-gzip data following gzip data to be handled. +//! +//! The [`MultiGzDecoder`] on the other hand will decode all members of a `gzip` file +//! into one consecutive stream of bytes, which hides the underlying *members* entirely. +//! If a file contains contains non-gzip data after the gzip data, MultiGzDecoder will +//! emit an error after decoding the gzip data. This behavior matches the `gzip`, +//! `gunzip`, and `zcat` command line tools. +//! //! [`read`]: read/index.html //! [`bufread`]: bufread/index.html //! [`write`]: write/index.html //! [read]: https://doc.rust-lang.org/std/io/trait.Read.html //! [write]: https://doc.rust-lang.org/std/io/trait.Write.html //! [bufread]: https://doc.rust-lang.org/std/io/trait.BufRead.html +//! [`GzDecoder`]: read/struct.GzDecoder.html +//! [`MultiGzDecoder`]: read/struct.MultiGzDecoder.html #![doc(html_root_url = "https://docs.rs/flate2/0.2")] #![deny(missing_docs)] #![deny(missing_debug_implementations)]