coreos · bgilbert · Dec 14, 2021 · Dec 9, 2021 · Dec 9, 2021 · Dec 9, 2021
diff --git a/fixtures/initrd/compressed-img.gen.sh b/fixtures/initrd/compressed-img.gen.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+dir=$(mktemp -d)
+trap "rm -r $dir" EXIT
+pushd $dir
+
+make() {
+    mkdir "$1"
+    echo HELLO > "$1/hello"
+    echo WORLD > "$1/world"
+    find "$1" | cpio -o -H newc -O "$1.cpio"
+}
+
+make uncompressed-1
+make uncompressed-2
+
+make gzip
+gzip -9 gzip.cpio
+
+make xz
+xz -9 xz.cpio
+
+cat uncompressed-1.cpio gzip.cpio.gz xz.cpio.xz uncompressed-2.cpio > compressed.img
+xz -9 compressed.img
+popd
+mv $dir/compressed.img.xz .
diff --git a/fixtures/initrd/compressed.img.xz b/fixtures/initrd/compressed.img.xz
diff --git a/fixtures/initrd/redundant.gen.sh b/fixtures/initrd/redundant.gen.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+dir=$(mktemp -d)
+trap "rm -r $dir" EXIT
+pushd $dir
+
+make() {
+    mkdir -p data
+    echo "$1" > data/file
+    find data | cpio -o -H newc -O "$1.cpio"
+}
+
+make first
+make second
+make third
+
+cat first.cpio second.cpio third.cpio | xz -9c > redundant.img.xz
+popd
+mv $dir/redundant.img.xz .
diff --git a/fixtures/initrd/redundant.img.xz b/fixtures/initrd/redundant.img.xz
diff --git a/src/cmdline.rs b/src/cmdline.rs
@@ -602,9 +602,9 @@ pub struct PxeIgnitionWrapConfig {
 
 #[derive(Debug, StructOpt)]
 pub struct PxeIgnitionUnwrapConfig {
-    /// initrd image
+    /// initrd image [default: stdin]
     #[structopt(value_name = "initrd")]
-    pub input: String,
+    pub input: Option<String>,
 }
 
 impl FromStr for FetchRetries {

diff --git a/src/io/compress.rs b/src/io/compress.rs
@@ -15,58 +15,96 @@
 use anyhow::{Context, Result};
 use flate2::bufread::GzDecoder;
 use std::io::{self, BufRead, ErrorKind, Read};
-use xz2::bufread::XzDecoder;
+
+use crate::io::XzStreamDecoder;
 
 enum CompressDecoder<R: BufRead> {
     Uncompressed(R),
     Gzip(GzDecoder<R>),
-    Xz(XzDecoder<R>),
+    Xz(XzStreamDecoder<R>),
 }
 
 pub struct DecompressReader<R: BufRead> {
     decoder: CompressDecoder<R>,
+    allow_trailing: bool,
 }
 
 /// Format-sniffing decompressor
 impl<R: BufRead> DecompressReader<R> {
-    pub fn new(mut source: R) -> Result<Self> {
+    pub fn new(source: R) -> Result<Self> {
+        Self::new_full(source, false)
+    }
+
+    pub fn for_concatenated(source: R) -> Result<Self> {
+        Self::new_full(source, true)
+    }
+
+    fn new_full(mut source: R, allow_trailing: bool) -> Result<Self> {
         use CompressDecoder::*;
         let sniff = source.fill_buf().context("sniffing input")?;
         let decoder = if sniff.len() > 2 && &sniff[0..2] == b"\x1f\x8b" {
             Gzip(GzDecoder::new(source))
         } else if sniff.len() > 6 && &sniff[0..6] == b"\xfd7zXZ\x00" {
-            Xz(XzDecoder::new(source))
+            Xz(XzStreamDecoder::new(source))
         } else {
             Uncompressed(source)
         };
-        Ok(Self { decoder })
+        Ok(Self {
+            decoder,
+            allow_trailing,
+        })
+    }
+
+    pub fn into_inner(self) -> R {
+        use CompressDecoder::*;
+        match self.decoder {
+            Uncompressed(d) => d,
+            Gzip(d) => d.into_inner(),
+            Xz(d) => d.into_inner(),
+        }
+    }
+
+    pub fn get_mut(&mut self) -> &mut R {
+        use CompressDecoder::*;
+        match &mut self.decoder {
+            Uncompressed(d) => d,
+            Gzip(d) => d.get_mut(),
+            Xz(d) => d.get_mut(),
+        }
+    }
+
+    pub fn compressed(&self) -> bool {
+        use CompressDecoder::*;
+        match &self.decoder {
+            Uncompressed(_) => false,
+            Gzip(_) => true,
+            Xz(_) => true,
+        }
     }
 }
 
 impl<R: BufRead> Read for DecompressReader<R> {
     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
         use CompressDecoder::*;
-        match &mut self.decoder {
-            Uncompressed(d) => d.read(buf),
-            Gzip(d) => {
-                let count = d.read(buf)?;
-                if count == 0 {
-                    // GzDecoder stops reading as soon as it encounters the
-                    // gzip trailer, so it doesn't notice trailing data,
-                    // which indicates something wrong with the input.  Try
-                    // reading one more byte, and fail if there is one.
-                    let mut buf = [0; 1];
-                    if d.get_mut().read(&mut buf)? > 0 {
-                        return Err(io::Error::new(
-                            ErrorKind::InvalidData,
-                            "found trailing data after compressed gzip stream",
-                        ));
-                    }
-                }
-                Ok(count)
+        let count = match &mut self.decoder {
+            Uncompressed(d) => d.read(buf)?,
+            Gzip(d) => d.read(buf)?,
+            Xz(d) => d.read(buf)?,
+        };
+        if count == 0 && !buf.is_empty() && self.compressed() && !self.allow_trailing {
+            // Decompressors stop reading as soon as they encounter the
+            // compression trailer, so they don't notice trailing data,
+            // which indicates something wrong with the input.  Try reading
+            // one more byte, and fail if there is one.
+            let mut buf = [0; 1];
+            if self.get_mut().read(&mut buf)? > 0 {
+                return Err(io::Error::new(
+                    ErrorKind::InvalidData,
+                    "found trailing data after compressed stream",
+                ));
             }
-            Xz(d) => d.read(buf),
         }
+        Ok(count)
     }
 }
 
@@ -103,5 +141,15 @@ mod tests {
             .unwrap()
             .read_to_end(&mut output)
             .unwrap_err();
+
+        // use concatenated mode, make sure we ignore trailing garbage
+        let mut reader = BufReader::new(&*input);
+        DecompressReader::for_concatenated(&mut reader)
+            .unwrap()
+            .read_to_end(&mut output)
+            .unwrap();
+        let mut remainder = Vec::new();
+        reader.read_to_end(&mut remainder).unwrap();
+        assert_eq!(&remainder, &[0]);
     }
 }
diff --git a/src/io/initrd.rs b/src/io/initrd.rs
@@ -0,0 +1,157 @@
+// Copyright 2019 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use anyhow::{Context, Result};
+use cpio::{write_cpio, NewcBuilder, NewcReader};
+use std::io::{BufRead, BufReader, Cursor, Read};
+use xz2::stream::{Check, Stream};
+use xz2::write::XzEncoder;
+
+use crate::io::*;
+
+/// Make an xz-compressed initrd containing the specified members.
+pub fn make_initrd(members: &[(&str, &[u8])]) -> Result<Vec<u8>> {
+    // kernel requires CRC32: https://www.kernel.org/doc/Documentation/xz.txt
+    let mut encoder = XzEncoder::new_stream(
+        Vec::new(),
+        Stream::new_easy_encoder(9, Check::Crc32).context("creating XZ encoder")?,
+    );
+    write_cpio(
+        members.iter().map(|(path, contents)|
+        // S_IFREG | 0644
+        (NewcBuilder::new(path).mode(0o100_644),
+        Cursor::new(*contents))),
+        &mut encoder,
+    )
+    .context("writing CPIO archive")?;
+    encoder.finish().context("closing XZ compressor")
+}
+
+/// Extract a compressed or uncompressed CPIO archive and return the
+/// contents of the specified path.
+pub fn extract_initrd<R: Read>(source: R, path: &str) -> Result<Option<Vec<u8>>> {
+    let mut source = BufReader::with_capacity(BUFFER_SIZE, source);
+    let mut result: Option<Vec<u8>> = None;
+    // loop until EOF
+    while !source
+        .fill_buf()
+        .context("checking for data in initrd")?
+        .is_empty()
+    {
+        // read one archive
+        let mut decompressor = DecompressReader::for_concatenated(source)?;
+        loop {
+            let mut reader = NewcReader::new(decompressor).context("reading CPIO entry")?;
+            let entry = reader.entry();
+            if entry.is_trailer() {
+                decompressor = reader.finish().context("finishing reading CPIO trailer")?;
+                break;
+            }
+            if entry.name() == path {
+                let mut buf = Vec::with_capacity(entry.file_size() as usize);
+                reader
+                    .read_to_end(&mut buf)
+                    .context("reading CPIO entry contents")?;
+                result = Some(buf);
+            }
+            decompressor = reader.finish().context("finishing reading CPIO entry")?;
+        }
+
+        // finish decompression, if any, and recover source
+        if decompressor.compressed() {
+            let mut trailing = Vec::new();
+            decompressor
+                .read_to_end(&mut trailing)
+                .context("finishing reading compressed archive")?;
+            // padding is okay; data is not
+            if trailing.iter().any(|v| *v != 0) {
+                bail!("found trailing garbage inside compressed archive");
+            }
+        }
+        source = decompressor.into_inner();
+
+        // skip any zero padding between archives
+        loop {
+            let buf = source
+                .fill_buf()
+                .context("checking for padding in initrd")?;
+            if buf.is_empty() {
+                // EOF
+                break;
+            }
+            match buf.iter().position(|v| *v != 0) {
+                Some(pos) => {
+                    source.consume(pos);
+                    break;
+                }
+                None => {
+                    let len = buf.len();
+                    source.consume(len);
+                }
+            }
+        }
+    }
+    Ok(result)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use xz2::read::XzDecoder;
+
+    #[test]
+    fn test_cpio_roundtrip() {
+        let input = r#"{}"#;
+        let cpio = make_initrd(&[("z", input.as_bytes())]).unwrap();
+        let output = extract_initrd(&*cpio, "z").unwrap().unwrap();
+        assert_eq!(input.as_bytes(), output.as_slice());
+    }
+
+    #[test]
+    fn test_cpio_compression() {
+        let mut archive: Vec<u8> = Vec::new();
+        XzDecoder::new(&include_bytes!("../../fixtures/initrd/compressed.img.xz")[..])
+            .read_to_end(&mut archive)
+            .unwrap();
+        for dir in &["uncompressed-1", "gzip", "xz", "uncompressed-2"] {
+            assert_eq!(
+                extract_initrd(&*archive, &format!("{}/hello", dir))
+                    .unwrap()
+                    .unwrap(),
+                b"HELLO\n"
+            );
+            assert_eq!(
+                extract_initrd(&*archive, &format!("{}/world", dir))
+                    .unwrap()
+                    .unwrap(),
+                b"WORLD\n"
+            );
+        }
+        assert!(extract_initrd(&*archive, "z").unwrap().is_none());
+    }
+
+    /// Check that the last copy of a file in an archive wins, which is
+    /// how the kernel behaves.
+    #[test]
+    fn test_cpio_redundancy() {
+        let mut archive: Vec<u8> = Vec::new();
+        XzDecoder::new(&include_bytes!("../../fixtures/initrd/redundant.img.xz")[..])
+            .read_to_end(&mut archive)
+            .unwrap();
+        assert_eq!(
+            extract_initrd(&*archive, "data/file").unwrap().unwrap(),
+            b"third\n"
+        );
+    }
+}
diff --git a/src/io/mod.rs b/src/io/mod.rs
@@ -19,14 +19,18 @@ use std::io::{ErrorKind, Read, Write};
 mod bls;
 mod compress;
 mod hash;
+mod initrd;
 mod limit;
 mod verify;
+mod xz;
 
 pub use self::bls::*;
 pub use self::compress::*;
 pub use self::hash::*;
+pub use self::initrd::*;
 pub use self::limit::*;
 pub use self::verify::*;
+pub use self::xz::*;
 
 // The default BufReader/BufWriter buffer size is 8 KiB, which isn't large
 // enough to fully amortize system call overhead.