datrs · khernyo · May 13, 2018 · May 13, 2018 · May 14, 2018 · May 15, 2018
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,3 +11,7 @@ readme = "README.md"
 [dependencies]
 failure = "0.1.1"
 byteorder = "1.2.1"
+
+[dependencies.nom]
+version = "~4.0.0"
+features = ["verbose-errors"]
diff --git a/benches/parser.rs b/benches/parser.rs
@@ -0,0 +1,14 @@
+#![feature(test)]
+extern crate test;
+
+extern crate sleep_parser;
+
+use sleep_parser::Header;
+use test::Bencher;
+
+const HEADER: &[u8; 32] = b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
+
+#[bench]
+fn header_parsing(b: &mut Bencher) {
+ b.iter(|| Header::from_bytes(HEADER));
+}
diff --git a/src/header.rs b/src/header.rs
@@ -1,8 +1,9 @@
 extern crate byteorder;
 
-use self::byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
+use self::byteorder::{BigEndian, WriteBytesExt};
 use failure::Error;
-use std::io::Cursor;
+use nom;
+use parsers;
 
 /// Algorithm used for hashing the data.
 #[derive(Debug, PartialEq)]
@@ -42,14 +43,14 @@ pub enum FileType {
 }
 
 /// SLEEP Protocol version.
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum ProtocolVersion {
  /// The version specified as per the paper released in 2017-09.
  V0,
 }
 
 /// Structural representation of 32 byte SLEEP headers.
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub struct Header {
  /// Type of file.
  pub file_type: FileType,
@@ -76,87 +77,15 @@ impl Header {
  }
  }
 
- /// Parse a 32 bit buffer slice into a valid Header.
- pub fn from_vec(buffer: &[u8]) -> Result<Header, Error> {
- ensure!(buffer.len() == 32, "buffer should be 32 bytes");
-
- let mut rdr = Cursor::new(buffer);
- let byte = rdr.read_u8().unwrap();
- ensure!(
- byte == 5,
- format!(
- "The first byte of a SLEEP header should be '5', found {}",
- byte
- )
- );
-
- let byte = rdr.read_u8().unwrap();
- ensure!(
- byte == 2,
- format!(
- "The second byte of a SLEEP header should be '2', found {}",
- byte
- )
- );
-
- let byte = rdr.read_u8().unwrap();
- ensure!(
- byte == 87,
- format!(
- "The third byte of a SLEEP header should be '87', found {}",
- byte
- )
- );
-
- let file_type = match rdr.read_u8().unwrap() {
- 0 => FileType::BitField,
- 1 => FileType::Signatures,
- 2 => FileType::Tree,
- num => bail!(format!(
- "The fourth byte '{}' does not belong to any known SLEEP file type",
- num
- )),
- };
-
- let protocol_version = match rdr.read_u8().unwrap() {
- 0 => ProtocolVersion::V0,
- num => bail!(format!(
- "The fifth byte '{}' does not belong to any known SLEEP protocol protocol_version",
- num
- )),
- };
-
- // Read entry size which will inform how many bytes to read next.
- let entry_size = rdr.read_u16::<BigEndian>().unwrap();
-
- // Read out the "entry_size" bytes into a string.
- // NOTE(yw): there should be a more concise way of doing this.
- let hash_name_len = rdr.read_u8().unwrap() as usize;
- let current = rdr.position() as usize;
-
- let hash_name_upper = current + hash_name_len;
- let buf_slice = &buffer[current..hash_name_upper];
- rdr.set_position(hash_name_upper as u64 + 1);
- let algo = ::std::str::from_utf8(buf_slice)
- .expect("The algorithm string was invalid utf8 encoded");
-
- let hash_type = match algo {
- "BLAKE2b" => HashType::BLAKE2b,
- "Ed25519" => HashType::Ed25519,
- _ => HashType::None,
- };
-
- for index in rdr.position()..32 {
- let byte = rdr.read_u8().unwrap();
- ensure!(byte == 0, format!("The remainder of the header should be zero-filled. Found byte '{}' at position '{}'.", byte, index));
- }
+ /// Parses a 32 byte buffer slice into a valid Header.
+ pub fn from_bytes(buf: &[u8]) -> Result<Header, Error> {
+ convert_nom_result(buf, parsers::header(buf))
+ }
 
- Ok(Header {
- protocol_version,
- entry_size,
- file_type,
- hash_type,
- })
+ /// Parse a 32 byte buffer slice into a valid Header.
+ #[deprecated(note = "Use from_bytes")]
+ pub fn from_vec(buffer: &[u8]) -> Result<Header, Error> {
+ Header::from_bytes(buffer)
  }
 
  /// Convert a `Header` into a `Vec<u8>`. Use this to persist a header back to
@@ -215,3 +144,36 @@ impl Header {
  && self.hash_type == HashType::BLAKE2b
  }
 }
+
+fn convert_nom_result(
+ buf: &[u8],
+ result: Result<(&[u8], Header), nom::Err<&[u8]>>,
+) -> Result<Header, Error> {
+ match result {
+ Ok((&[], h)) => Ok(h),
+ Ok((remaining, _)) => {
+ assert!(
+ buf.len() > parsers::HEADER_LENGTH,
+ "broken parser: input length is {}, but got unparsed input of length {}",
+ buf.len(),
+ remaining.len()
+ );
+ Err(format_err!("input must be {} bytes", parsers::HEADER_LENGTH))
+ }
+ Err(e @ nom::Err::Incomplete(_)) => {
+ assert!(
+ buf.len() < parsers::HEADER_LENGTH,
+ "broken parser: input length is {}, but got error: {:?}",
+ buf.len(),
+ e
+ );
+ Err(format_err!("input must be {} bytes", parsers::HEADER_LENGTH))
+ }
+ Err(nom::Err::Error(context)) => {
+ Err(format_err!("nom error: {:?}", context.into_error_kind()))
+ }
+ Err(nom::Err::Failure(context)) => {
+ Err(format_err!("nom failure: {:?}", context.into_error_kind()))
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -5,8 +5,11 @@
 
 #[macro_use]
 extern crate failure;
+#[macro_use]
+extern crate nom;
 
 mod header;
+mod parsers;
 
 pub use header::*;
 

diff --git a/src/parsers.rs b/src/parsers.rs
@@ -0,0 +1,146 @@
+#![cfg_attr(feature = "cargo-clippy", allow(clippy))]
+
+use header::*;
+use nom::{be_u16, be_u8, rest};
+use std::str;
+
+pub(crate) const HEADER_LENGTH: usize = 32;
+const VERIFY_TRAILING_ZEROS: bool = true;
+
+named!(
+ file_type<FileType>,
+ switch!(be_u8,
+ 0 => value!(FileType::BitField) |
+ 1 => value!(FileType::Signatures) |
+ 2 => value!(FileType::Tree)
+ )
+);
+
+named!(
+ protocol_version<ProtocolVersion>,
+ switch!(be_u8,
+ 0 => value!(ProtocolVersion::V0)
+ )
+);
+
+named_args!(
+ algorithm(len: u8)<HashType>,
+ switch!(map_res!(take!(len), str::from_utf8),
+ "BLAKE2b" => value!(HashType::BLAKE2b) |
+ "Ed25519" => value!(HashType::Ed25519) |
+ "" => value!(HashType::None)
+ )
+);
+
+named!(
+ pub header<Header>,
+ flat_map!(
+ take!(HEADER_LENGTH),
+ do_parse!(
+ tag!(b"\x05\x02\x57") >>
+ file_type: file_type >>
+ protocol_version: protocol_version >>
+ entry_size: be_u16 >>
+
+ algorithm_len: verify!(be_u8, |len: u8| len <= HEADER_LENGTH as u8 - 8) >>
+ algorithm: apply!(algorithm, algorithm_len) >>
+
+ verify!(rest, |bytes: &[u8]| {
+ let header_consumed = bytes.len() + algorithm_len as usize + 8 == HEADER_LENGTH;
+ let trailing_zeros = !VERIFY_TRAILING_ZEROS || bytes.iter().all(|&b| b == 0u8);
+ header_consumed && trailing_zeros
+ }) >>
+
+ (Header {
+ file_type,
+ protocol_version,
+ entry_size,
+ hash_type: algorithm,
+ })
+ )
+ )
+);
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ use nom;
+
+ #[test]
+ fn parse_file_type() {
+ assert_eq!(
+ file_type(b"\x00"),
+ Ok((&[][..], FileType::BitField))
+ );
+ assert_eq!(
+ file_type(b"\x01"),
+ Ok((&[][..], FileType::Signatures))
+ );
+ assert_eq!(
+ file_type(b"\x02"),
+ Ok((&[][..], FileType::Tree))
+ );
+ assert!(file_type(b"\xff").is_err());
+ }
+
+ #[test]
+ fn parse_header() {
+ fn mk_header(prefix: &[u8]) -> [u8; 32] {
+ let mut h = [0u8; 32];
+ h[0..prefix.len()].clone_from_slice(prefix);
+ h
+ }
+
+ assert_eq!(
+ header(&mk_header(
+ b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b"
+ )),
+ Ok((
+ &[][..],
+ Header {
+ file_type: FileType::Signatures,
+ protocol_version: ProtocolVersion::V0,
+ entry_size: 40,
+ hash_type: HashType::BLAKE2b
+ }
+ ))
+ );
+ assert_eq!(
+ header(&mk_header(
+ b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b"
+ )).unwrap()
+ .1
+ .hash_type,
+ HashType::BLAKE2b
+ );
+ assert_eq!(
+ header(&mk_header(
+ b"\x05\x02W\x01\x00\x00\x28\x07Ed25519"
+ )).unwrap()
+ .1
+ .hash_type,
+ HashType::Ed25519
+ );
+ assert_eq!(
+ header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x00"))
+ .unwrap()
+ .1
+ .hash_type,
+ HashType::None
+ );
+ assert!(header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x01B")).is_err());
+ assert!(header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x01B")).is_err());
+
+ let h = b"\x05\x02W\x01\x00\x00\x28\x19BLAKE2bXXXXXXXXXXXXXXXXXX";
+ assert!(header(h).is_err());
+ }
+
+ #[test]
+ fn invalid_algorithm_len() {
+ match header(b"\x05\x02W\x00\x00\x00\x00\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00") {
+ Err(nom::Err::Error(nom::Context::Code(_, nom::ErrorKind::Verify))) => (),
+ x => panic!("{:?}", x),
+ }
+ }
+}