From 7d5856d0bb724eb77a558c89a5bae878e1d8dc3c Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Sun, 30 Oct 2022 20:14:10 -0700 Subject: [PATCH 1/8] Recommend MultiGzDecoder over GzDecoder in docs --- src/gz/bufread.rs | 18 +++++++----------- src/gz/read.rs | 21 +++++++-------------- src/gz/write.rs | 17 ++++++++--------- 3 files changed, 22 insertions(+), 34 deletions(-) diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 5b5061a3..953cc569 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -167,11 +167,11 @@ impl Write for GzEncoder { } } -/// A gzip streaming decoder +/// A decoder for a single member of a gzip file. Prefer [MultiGzDecoder] for +/// most uses. /// /// This structure consumes a [`BufRead`] interface, reading compressed data /// from the underlying reader, and emitting uncompressed data. -/// Use [`MultiGzDecoder`] if your file has multiple streams. /// /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// @@ -397,20 +397,16 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a full [gzip file]. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while [`GzDecoder`] will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. It's also useful -/// to compress large amounts of data in parallel where each thread produces one stream -/// for a chunk of input data. +/// A gzip file consists of a series of "members" concatenated one after another. +/// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode +/// the first one member. MultiGzDecoder is preferable in most cases. /// /// This structure exposes a [`BufRead`] interface that will consume all gzip members /// from the underlying reader and emit uncompressed data. /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples diff --git a/src/gz/read.rs b/src/gz/read.rs index 2a16a6ac..4d10c4a8 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -90,13 +90,11 @@ impl Write for GzEncoder { } } -/// A gzip streaming decoder +/// A decoder for a single member of a gzip file. Prefer [MultiGzDecoder] for +/// most uses. /// /// This structure exposes a [`Read`] interface that will consume compressed /// data from the underlying reader and emit uncompressed data. -/// Use [`MultiGzDecoder`] if your file has multiple streams. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// /// # Examples /// @@ -180,21 +178,16 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a full [gzip file]. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while [`GzDecoder`] will only decompress the -/// first gzip member. The multistream format is commonly used in bioinformatics, -/// for example when using the BGZF compressed data. It's also useful -/// to compress large amounts of data in parallel where each thread produces one stream -/// for a chunk of input data. +/// A gzip file consists of a series of "members" concatenated one after another. +/// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode +/// the first one member. MultiGzDecoder is preferable in most cases. /// /// This structure exposes a [`Read`] interface that will consume all gzip members /// from the underlying reader and emit uncompressed data. /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// diff --git a/src/gz/write.rs b/src/gz/write.rs index dd8a6bd0..8f84ee73 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -166,11 +166,11 @@ impl Drop for GzEncoder { } } -/// A gzip streaming decoder +/// A decoder for a single member of a gzip file. Prefer [MultiGzDecoder] for +/// most uses. /// /// This structure exposes a [`Write`] interface that will emit uncompressed data /// to the underlying writer `W`. -/// Use [`MultiGzDecoder`] if your file has multiple streams. /// /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html /// @@ -373,17 +373,16 @@ impl Read for GzDecoder { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a full [gzip file]. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. +/// A gzip file consists of a series of "members" concatenated one after another. +/// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode +/// the first one member. MultiGzDecoder is preferable in most cases. /// /// This structure exposes a [`Write`] interface that will consume all gzip members /// from the written buffers and write uncompressed data to the writer. +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 #[derive(Debug)] pub struct MultiGzDecoder { inner: GzDecoder, From 7cfdd4e93cfc42ec7c7ea6303087033746d26fde Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 17 Jul 2023 09:05:32 +0200 Subject: [PATCH 2/8] minor improvements to the MultiGzDecoder documentation --- src/gz/bufread.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 953cc569..25bd74ac 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -397,11 +397,11 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes a full [gzip file]. +/// A gzip streaming decoder that decodes a complete [gzip file]. /// /// A gzip file consists of a series of "members" concatenated one after another. /// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode -/// the first one member. MultiGzDecoder is preferable in most cases. +/// the first member. MultiGzDecoder is preferable in most cases. /// /// This structure exposes a [`BufRead`] interface that will consume all gzip members /// from the underlying reader and emit uncompressed data. From a2325748912d02e3e1d80d6529aa786297ab768e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 20 Jul 2023 08:31:20 +0200 Subject: [PATCH 3/8] applies copies of minor improvements --- src/gz/read.rs | 2 +- src/gz/write.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gz/read.rs b/src/gz/read.rs index 4d10c4a8..fbd34ccd 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -182,7 +182,7 @@ impl Write for GzDecoder { /// /// A gzip file consists of a series of "members" concatenated one after another. /// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode -/// the first one member. MultiGzDecoder is preferable in most cases. +/// the first member. MultiGzDecoder is preferable in most cases. /// /// This structure exposes a [`Read`] interface that will consume all gzip members /// from the underlying reader and emit uncompressed data. diff --git a/src/gz/write.rs b/src/gz/write.rs index 8f84ee73..b2b7be5e 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -377,7 +377,7 @@ impl Read for GzDecoder { /// /// A gzip file consists of a series of "members" concatenated one after another. /// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode -/// the first one member. MultiGzDecoder is preferable in most cases. +/// the first member. MultiGzDecoder is preferable in most cases. /// /// This structure exposes a [`Write`] interface that will consume all gzip members /// from the written buffers and write uncompressed data to the writer. From e21986e28c728ceec2c53c16ca5dbbd8a5ccfd5b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 23 Jul 2023 15:21:21 +0200 Subject: [PATCH 4/8] Add top-level comparison between `GzDecoder` and `MultiGzDecoder` --- src/lib.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 738875c5..6cbb0063 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,12 +65,30 @@ //! `Write` trait if `T: Write`. That is, the "dual trait" is forwarded directly //! to the underlying object if available. //! +//! # About multi-member Gzip files +//! +//! While most `gzip` files one encounters will have a single *member* that can be read +//! with the [`GzDecoder`], there may be some files which have multiple members. +//! +//! If these are read with a [`GzDecoder`], only the first member will be consumed and +//! the rest will silently be left alone, which can be surprising. +//! +//! The [`MultiGzDecoder`] on the other hand will decode all *members* of `gzip` file +//! into one consecutive stream of bytes, which hides the underlying *members* entirely +//! while failing if the file does not contain solely `gzip` *members*. +//! +//! It's worth noting that major browser like Chrome, Firefox as well as tool like `curl` +//! will only decode the first member of a `gzip` encoded reply, so what's right to do +//! truly depends on the context, as well the expected input of the library or application. +//! //! [`read`]: read/index.html //! [`bufread`]: bufread/index.html //! [`write`]: write/index.html //! [read]: https://doc.rust-lang.org/std/io/trait.Read.html //! [write]: https://doc.rust-lang.org/std/io/trait.Write.html //! [bufread]: https://doc.rust-lang.org/std/io/trait.BufRead.html +//! [`GzDecoder`]: read/struct.GzDecoder.html +//! [`MultiGzDecoder`]: read/struct.MultiGzDecoder.html #![doc(html_root_url = "https://docs.rs/flate2/0.2")] #![deny(missing_docs)] #![deny(missing_debug_implementations)] From 1e095719b361f0a3e857fa6d539cef7cfad4166f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 23 Jul 2023 15:38:55 +0200 Subject: [PATCH 5/8] Apply suggestions to impartial to Gz and MultiGz implementations. I also added a reference to the general section about the differences in the crate documentation. Co-Authored-By: Josh Triplett --- src/gz/bufread.rs | 8 ++++---- src/gz/read.rs | 11 +++++++---- src/gz/write.rs | 13 ++++++++----- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 25bd74ac..6f7b514b 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -167,8 +167,7 @@ impl Write for GzEncoder { } } -/// A decoder for a single member of a gzip file. Prefer [MultiGzDecoder] for -/// most uses. +/// A decoder for a gzip file with a single member. /// /// This structure consumes a [`BufRead`] interface, reading compressed data /// from the underlying reader, and emitting uncompressed data. @@ -397,11 +396,12 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes a complete [gzip file]. +/// A gzip streaming decoder that decodes a [gzip file] with multiple members. /// /// A gzip file consists of a series of "members" concatenated one after another. /// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode -/// the first member. MultiGzDecoder is preferable in most cases. +/// the first member. Learn more +/// [in the introduction](https://docs.rs/flate2/*/flate2/#About-multi-member-Gzip-files). /// /// This structure exposes a [`BufRead`] interface that will consume all gzip members /// from the underlying reader and emit uncompressed data. diff --git a/src/gz/read.rs b/src/gz/read.rs index fbd34ccd..aa36ad2c 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -90,12 +90,14 @@ impl Write for GzEncoder { } } -/// A decoder for a single member of a gzip file. Prefer [MultiGzDecoder] for -/// most uses. +/// A decoder for a gzip file with a single member. /// /// This structure exposes a [`Read`] interface that will consume compressed /// data from the underlying reader and emit uncompressed data. /// +/// This decoder only handles gzipped data with a single stream. +/// Use [`MultiGzDecoder`] for gzipped data with multiple streams. +/// /// # Examples /// /// ``` @@ -178,11 +180,12 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes a full [gzip file]. +/// A gzip streaming decoder that decodes a [gzip file] with multiple members. /// /// A gzip file consists of a series of "members" concatenated one after another. /// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode -/// the first member. MultiGzDecoder is preferable in most cases. +/// the first member. Learn more +/// [in the introduction](https://docs.rs/flate2/*/flate2/#About-multi-member-Gzip-files). /// /// This structure exposes a [`Read`] interface that will consume all gzip members /// from the underlying reader and emit uncompressed data. diff --git a/src/gz/write.rs b/src/gz/write.rs index b2b7be5e..d1d309dc 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -166,12 +166,14 @@ impl Drop for GzEncoder { } } -/// A decoder for a single member of a gzip file. Prefer [MultiGzDecoder] for -/// most uses. +/// A decoder for a gzip file with a single member. /// /// This structure exposes a [`Write`] interface that will emit uncompressed data /// to the underlying writer `W`. /// +/// This decoder only handles gzipped data with a single stream. +/// Use [`MultiGzDecoder`] for gzipped data with multiple streams. +/// /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html /// /// # Examples @@ -373,11 +375,12 @@ impl Read for GzDecoder { } } -/// A gzip streaming decoder that decodes a full [gzip file]. +/// A gzip streaming decoder that decodes a [gzip file] with multiple members. /// /// A gzip file consists of a series of "members" concatenated one after another. -/// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode -/// the first member. MultiGzDecoder is preferable in most cases. +/// `MultiGzDecoder` decodes all members of a file, while [GzDecoder] will only decode +/// the first member. Learn more +/// [in the introduction](https://docs.rs/flate2/*/flate2/#About-multi-member-Gzip-files). /// /// This structure exposes a [`Write`] interface that will consume all gzip members /// from the written buffers and write uncompressed data to the writer. From 955728bb94b43dc8763c667e7c5d5c09edf3b7c8 Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Mon, 24 Jul 2023 18:15:47 -0700 Subject: [PATCH 6/8] Tweak the {Gz,MultiGz}Decoder docs more - Use relative paths to link to the introduction. - Use consistent language across {Read,BufRead,Write}{Gz,MultiGz}Decoder. - Use `member` rather than `stream`. - Document what happens to unused data for `Gz` variants. --- src/gz/bufread.rs | 31 ++++++++++++++++++++++--------- src/gz/read.rs | 31 +++++++++++++++++++++---------- src/gz/write.rs | 29 ++++++++++++++++++----------- src/lib.rs | 17 ++++++++++------- 4 files changed, 71 insertions(+), 37 deletions(-) diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 6f7b514b..3a0cda8f 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -167,11 +167,22 @@ impl Write for GzEncoder { } } -/// A decoder for a gzip file with a single member. +/// A decoder for the first member of a [gzip file]. /// -/// This structure consumes a [`BufRead`] interface, reading compressed data +/// This structure exposes a [`BufRead`] interface, reading compressed data /// from the underlying reader, and emitting uncompressed data. /// +/// After reading the first member of a gzip file (which is often, but not +/// always, the only member), this reader will return Ok(0) even if there +/// are more bytes available in the underlying reader. If you want to be sure +/// not to drop bytes on the floor, call `into_inner()` after Ok(0) to +/// recover the underlying reader. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -396,15 +407,17 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes a [gzip file] with multiple members. +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. +/// +/// This structure exposes a [`BufRead`] interface that will consume compressed +/// data from the underlying reader and emit uncompressed data. /// -/// A gzip file consists of a series of "members" concatenated one after another. -/// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode -/// the first member. Learn more -/// [in the introduction](https://docs.rs/flate2/*/flate2/#About-multi-member-Gzip-files). +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the +/// underlying reader does. /// -/// This structure exposes a [`BufRead`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html diff --git a/src/gz/read.rs b/src/gz/read.rs index aa36ad2c..adc9cda6 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -90,13 +90,22 @@ impl Write for GzEncoder { } } -/// A decoder for a gzip file with a single member. +/// A decoder for the first member of a [gzip file]. /// /// This structure exposes a [`Read`] interface that will consume compressed /// data from the underlying reader and emit uncompressed data. /// -/// This decoder only handles gzipped data with a single stream. -/// Use [`MultiGzDecoder`] for gzipped data with multiple streams. +/// After reading the first member of a gzip file (which is often, but not +/// always, the only member), this reader will return Ok(0) even if there +/// are more bytes available in the underlying reader. If you want to be sure +/// not to drop bytes on the floor, call `into_inner()` after Ok(0) to +/// recover the underlying reader. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// @@ -180,15 +189,17 @@ impl Write for GzDecoder { } } -/// A gzip streaming decoder that decodes a [gzip file] with multiple members. +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. +/// +/// This structure exposes a [`Read`] interface that will consume compressed +/// data from the underlying reader and emit uncompressed data. /// -/// A gzip file consists of a series of "members" concatenated one after another. -/// MultiGzDecoder decodes all members of a file, while [GzDecoder] will only decode -/// the first member. Learn more -/// [in the introduction](https://docs.rs/flate2/*/flate2/#About-multi-member-Gzip-files). +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the +/// underlying reader does. /// -/// This structure exposes a [`Read`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// diff --git a/src/gz/write.rs b/src/gz/write.rs index d1d309dc..030b38e5 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -166,14 +166,19 @@ impl Drop for GzEncoder { } } -/// A decoder for a gzip file with a single member. +/// A decoder for the first member of a [gzip file]. /// -/// This structure exposes a [`Write`] interface that will emit uncompressed data -/// to the underlying writer `W`. +/// This structure exposes a [`Write`] interface, receiving compressed data and +/// writing uncompressed data to the underlying writer. +/// +/// After decoding the first member of a gzip file, this writer will return XXX +/// to all subsequent writes. /// -/// This decoder only handles gzipped data with a single stream. -/// Use [`MultiGzDecoder`] for gzipped data with multiple streams. +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html /// /// # Examples @@ -377,13 +382,15 @@ impl Read for GzDecoder { /// A gzip streaming decoder that decodes a [gzip file] with multiple members. /// -/// A gzip file consists of a series of "members" concatenated one after another. -/// `MultiGzDecoder` decodes all members of a file, while [GzDecoder] will only decode -/// the first member. Learn more -/// [in the introduction](https://docs.rs/flate2/*/flate2/#About-multi-member-Gzip-files). +/// This structure exposes a [`Write`] interface that will consume compressed data and +/// write uncompressed data to the underlying writer. +/// +/// A gzip file consists of a series of *members* concatenated one after another. +/// `MultiGzDecoder` decodes all members of a file and writes them to the +/// underlying writer one after another. /// -/// This structure exposes a [`Write`] interface that will consume all gzip members -/// from the written buffers and write uncompressed data to the writer. +/// To handle members separately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 #[derive(Debug)] diff --git a/src/lib.rs b/src/lib.rs index 6cbb0063..c9590b92 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,13 +73,16 @@ //! If these are read with a [`GzDecoder`], only the first member will be consumed and //! the rest will silently be left alone, which can be surprising. //! -//! The [`MultiGzDecoder`] on the other hand will decode all *members* of `gzip` file -//! into one consecutive stream of bytes, which hides the underlying *members* entirely -//! while failing if the file does not contain solely `gzip` *members*. -//! -//! It's worth noting that major browser like Chrome, Firefox as well as tool like `curl` -//! will only decode the first member of a `gzip` encoded reply, so what's right to do -//! truly depends on the context, as well the expected input of the library or application. +//! The [`MultiGzDecoder`] on the other hand will decode all members of a `gzip` file +//! into one consecutive stream of bytes, which hides the underlying *members* entirely. +//! If a file contains contains non-gzip data after the gzip data, MultiGzDecoder will +//! emit an error after decoding the gzip data. This behavior matches the `gzip`, +//! `gunzip`, and `zcat` command line tools. +//! +//! Chrome and Firefox appear to implement behavior like `GzDecoder`, ignoring data +//! after the first member. `curl` appears to implement behavior somewhat like +//! `GzDecoder`, only decoding the first member, but emitting an error if there is +//! data after the first member, whether or not it is gzip data. //! //! [`read`]: read/index.html //! [`bufread`]: bufread/index.html From f0bf8a6516936faf65b5a4ad856465d9c5ad9b95 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 30 Jul 2023 09:29:17 +0200 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: jongiddy --- src/gz/bufread.rs | 13 ++++++------- src/gz/read.rs | 10 +++++----- src/gz/write.rs | 7 ++++--- src/lib.rs | 6 ++++-- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 3a0cda8f..24634e30 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -167,15 +167,14 @@ impl Write for GzEncoder { } } -/// A decoder for the first member of a [gzip file]. +/// A decoder for a single member of a [gzip file]. /// /// This structure exposes a [`BufRead`] interface, reading compressed data /// from the underlying reader, and emitting uncompressed data. /// -/// After reading the first member of a gzip file (which is often, but not -/// always, the only member), this reader will return Ok(0) even if there -/// are more bytes available in the underlying reader. If you want to be sure -/// not to drop bytes on the floor, call `into_inner()` after Ok(0) to +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// If you need the following bytes, call `into_inner()` after Ok(0) to /// recover the underlying reader. /// /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] @@ -413,8 +412,8 @@ impl Write for GzDecoder { /// data from the underlying reader and emit uncompressed data. /// /// A gzip file consists of a series of *members* concatenated one after another. -/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the -/// underlying reader does. +/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the +/// underlying reader does. For a file, this reads to the end of the file. /// /// To handle members seperately, see [GzDecoder] or read more /// [in the introduction](../index.html#about-multi-member-gzip-files). diff --git a/src/gz/read.rs b/src/gz/read.rs index adc9cda6..6368509a 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -95,11 +95,11 @@ impl Write for GzEncoder { /// This structure exposes a [`Read`] interface that will consume compressed /// data from the underlying reader and emit uncompressed data. /// -/// After reading the first member of a gzip file (which is often, but not -/// always, the only member), this reader will return Ok(0) even if there -/// are more bytes available in the underlying reader. If you want to be sure -/// not to drop bytes on the floor, call `into_inner()` after Ok(0) to -/// recover the underlying reader. +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// `GzDecoder` may have read additional bytes past the end of the gzip data. +/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader` +/// and use `bufread::GzDecoder` instead. /// /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] /// or read more diff --git a/src/gz/write.rs b/src/gz/write.rs index 030b38e5..4184c855 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -166,13 +166,14 @@ impl Drop for GzEncoder { } } -/// A decoder for the first member of a [gzip file]. +/// A decoder for a single member of a [gzip file]. /// /// This structure exposes a [`Write`] interface, receiving compressed data and /// writing uncompressed data to the underlying writer. /// -/// After decoding the first member of a gzip file, this writer will return XXX -/// to all subsequent writes. +/// After decoding a single member of the gzip data this writer will return the number of bytes up to +/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to +/// handle any data following the gzip member. /// /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] /// or read more diff --git a/src/lib.rs b/src/lib.rs index c9590b92..018dc40a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -70,8 +70,10 @@ //! While most `gzip` files one encounters will have a single *member* that can be read //! with the [`GzDecoder`], there may be some files which have multiple members. //! -//! If these are read with a [`GzDecoder`], only the first member will be consumed and -//! the rest will silently be left alone, which can be surprising. +//! A [`GzDecoder`] will only read the first member of gzip data, which may unexpectedly +//! provide partial results when a multi-member gzip file is encountered. `GzDecoder` is appropriate +//! for data that is designed to be read as single members from a multi-member file. `bufread::GzDecoder` +//! and `write::GzDecoder` also allow non-gzip data following gzip data to be handled. //! //! The [`MultiGzDecoder`] on the other hand will decode all members of a `gzip` file //! into one consecutive stream of bytes, which hides the underlying *members* entirely. From fc30d9e24bffad84eba0d8bcc046e594126398a5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 30 Jul 2023 09:29:37 +0200 Subject: [PATCH 8/8] remove introductory paragraph that described other tools unrelated to `flate2` --- src/gz/bufread.rs | 4 ++-- src/gz/read.rs | 2 +- src/gz/write.rs | 4 ++-- src/lib.rs | 9 ++------- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 24634e30..e65c2eb6 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -172,8 +172,8 @@ impl Write for GzEncoder { /// This structure exposes a [`BufRead`] interface, reading compressed data /// from the underlying reader, and emitting uncompressed data. /// -/// After reading a single member of the gzip data this reader will return -/// Ok(0) even if there are more bytes available in the underlying reader. +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. /// If you need the following bytes, call `into_inner()` after Ok(0) to /// recover the underlying reader. /// diff --git a/src/gz/read.rs b/src/gz/read.rs index 6368509a..2f923731 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -98,7 +98,7 @@ impl Write for GzEncoder { /// After reading a single member of the gzip data this reader will return /// Ok(0) even if there are more bytes available in the underlying reader. /// `GzDecoder` may have read additional bytes past the end of the gzip data. -/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader` +/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader` /// and use `bufread::GzDecoder` instead. /// /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] diff --git a/src/gz/write.rs b/src/gz/write.rs index 4184c855..feda221e 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -171,8 +171,8 @@ impl Drop for GzEncoder { /// This structure exposes a [`Write`] interface, receiving compressed data and /// writing uncompressed data to the underlying writer. /// -/// After decoding a single member of the gzip data this writer will return the number of bytes up to -/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to +/// After decoding a single member of the gzip data this writer will return the number of bytes up to +/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to /// handle any data following the gzip member. /// /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] diff --git a/src/lib.rs b/src/lib.rs index 018dc40a..127e2354 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -70,8 +70,8 @@ //! While most `gzip` files one encounters will have a single *member* that can be read //! with the [`GzDecoder`], there may be some files which have multiple members. //! -//! A [`GzDecoder`] will only read the first member of gzip data, which may unexpectedly -//! provide partial results when a multi-member gzip file is encountered. `GzDecoder` is appropriate +//! A [`GzDecoder`] will only read the first member of gzip data, which may unexpectedly +//! provide partial results when a multi-member gzip file is encountered. `GzDecoder` is appropriate //! for data that is designed to be read as single members from a multi-member file. `bufread::GzDecoder` //! and `write::GzDecoder` also allow non-gzip data following gzip data to be handled. //! @@ -81,11 +81,6 @@ //! emit an error after decoding the gzip data. This behavior matches the `gzip`, //! `gunzip`, and `zcat` command line tools. //! -//! Chrome and Firefox appear to implement behavior like `GzDecoder`, ignoring data -//! after the first member. `curl` appears to implement behavior somewhat like -//! `GzDecoder`, only decoding the first member, but emitting an error if there is -//! data after the first member, whether or not it is gzip data. -//! //! [`read`]: read/index.html //! [`bufread`]: bufread/index.html //! [`write`]: write/index.html