From bab8a77f0e32201541f740d3c93830e6f9eafaa8 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sat, 11 Dec 2021 22:14:21 -0500 Subject: [PATCH 1/2] Optionally allow trailing data in bufread::XzDecoder Some xz streams have unrelated data afterward. In particular, Linux kernel initrd files are the concatenation of multiple cpio archives, each of which can be compressed with a different compressor. read::XzDecoder and bufread::XzDecoder return InvalidData in this case, which makes it difficult to detect the EOF, unwrap the underlying stream, and continue reading with a different decompressor. (write::XzDecoder returns Ok(0) after the end of the xz stream, which is less ambiguous.) Multi-decoder mode doesn't address this, since that only handles the case where the following data is also an xz stream. liblzma properly returns StreamEnd here; we just need to detect it. However, the xz test suite contains some tests with trailing garbage, and the xz command-line tool is designed to fail on those unless --single-stream is specified. For compatibility, we probably can't allow trailing garbage by default, but we can provide an option. Add an allow_trailing_data() toggle to bufread::XzDecoder, and stop accepting bytes in read() if we reach StreamEnd with that toggle enabled. Do not add a similar option to read::XzDecoder, since it's only useful if the underlying stream is synced to the end of the xz stream afterward, and read::XzDecoder can't ensure that. --- src/bufread.rs | 53 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/src/bufread.rs b/src/bufread.rs index 44f65839..bfc747a1 100644 --- a/src/bufread.rs +++ b/src/bufread.rs @@ -27,6 +27,7 @@ pub struct XzEncoder { pub struct XzDecoder { obj: R, data: Stream, + allow_trailing_data: bool, } impl XzEncoder { @@ -163,11 +164,21 @@ impl XzDecoder { XzDecoder { obj: r, data: stream, + allow_trailing_data: false, } } } impl XzDecoder { + /// Configures whether to allow trailing data after the XZ stream. + /// + /// If true, additional data after the compressed XZ stream is ignored + /// and can be read from the underlying stream afterward. If false, + /// such data produces an error. Defaults to false. + pub fn allow_trailing_data(&mut self, allow: bool) { + self.allow_trailing_data = allow; + } + /// Acquires a reference to the underlying stream pub fn get_ref(&self) -> &R { &self.obj @@ -227,6 +238,9 @@ impl Read for XzDecoder { } return Ok(read); } + if self.allow_trailing_data && status == Status::StreamEnd { + return Ok(read); + } if consumed == 0 { return Err(io::Error::new( io::ErrorKind::InvalidData, @@ -284,21 +298,30 @@ mod tests { decoder_input.extend(&additional_data); // Decoder must be able to read the compressed xz stream, and keep the trailing data. - let mut decoder_reader = &decoder_input[..]; - { - let mut decoder = XzDecoder::new(&mut decoder_reader); - let mut decompressed_data = vec![0u8; to_compress.len()]; - - assert_eq!( - decoder.read(&mut decompressed_data).unwrap(), - COMPRESSED_ORIG_SIZE - ); - assert_eq!(decompressed_data, &to_compress[..]); - } + for allow_trailing_data in [false, true] { + let mut decoder_reader = &decoder_input[..]; + { + let mut decoder = XzDecoder::new(&mut decoder_reader); + decoder.allow_trailing_data(allow_trailing_data); + let mut decompressed_data = vec![0u8; to_compress.len() + 20]; + + assert_eq!( + decoder.read(&mut decompressed_data).unwrap(), + COMPRESSED_ORIG_SIZE + ); + assert_eq!(&decompressed_data[..COMPRESSED_ORIG_SIZE], &to_compress[..]); + + if allow_trailing_data { + assert_eq!(decoder.read(&mut decompressed_data).unwrap(), 0); + } else { + decoder.read(&mut decompressed_data).unwrap_err(); + } + } - let mut remaining_data = Vec::new(); - let nb_read = decoder_reader.read_to_end(&mut remaining_data).unwrap(); - assert_eq!(nb_read, ADDITIONAL_SIZE); - assert_eq!(remaining_data, &additional_data[..]); + let mut remaining_data = Vec::new(); + let nb_read = decoder_reader.read_to_end(&mut remaining_data).unwrap(); + assert_eq!(nb_read, ADDITIONAL_SIZE); + assert_eq!(remaining_data, &additional_data[..]); + } } } From 0c3a4d6498318998edef3f2e6072ea3747bf6473 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sat, 11 Dec 2021 22:18:01 -0500 Subject: [PATCH 2/2] Add trailing data test for write::XzDecoder Verify that write::XzDecoder refuses to accept additional bytes after the xz stream reaches StreamEnd. --- src/write.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/write.rs b/src/write.rs index 9ffad238..bdaefb11 100644 --- a/src/write.rs +++ b/src/write.rs @@ -344,6 +344,20 @@ mod tests { assert_eq!(&data[..], b""); } + #[test] + fn trailing_data() { + let mut c = XzEncoder::new(Vec::new(), 6); + c.write_all(b"12834").unwrap(); + let mut compressed = c.finish().unwrap(); + compressed.extend(b"asdf"); + let mut d = XzDecoder::new(Vec::new()); + assert_eq!(d.write(&compressed).unwrap(), compressed.len() - 4); + assert_eq!(d.write(b"asdf").unwrap(), 0); + assert_eq!(d.write(b"asdf").unwrap(), 0); + let data = d.finish().unwrap(); + assert_eq!(&data, b"12834"); + } + #[test] fn qc() { ::quickcheck::quickcheck(test as fn(_) -> _);