Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nom based parser #4

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ readme = "README.md"
[dependencies]
failure = "0.1.1"
byteorder = "1.2.1"

[dependencies.nom]
version = "~4.0.0"
features = ["verbose-errors"]
14 changes: 14 additions & 0 deletions benches/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#![feature(test)]
extern crate test;

extern crate sleep_parser;

use sleep_parser::Header;
use test::Bencher;

const HEADER: &[u8; 32] = b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";

#[bench]
fn header_parsing(b: &mut Bencher) {
b.iter(|| Header::from_bytes(HEADER));
}
130 changes: 46 additions & 84 deletions src/header.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
extern crate byteorder;

use self::byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use self::byteorder::{BigEndian, WriteBytesExt};
use failure::Error;
use std::io::Cursor;
use nom;
use parsers;

/// Algorithm used for hashing the data.
#[derive(Debug, PartialEq)]
Expand Down Expand Up @@ -42,14 +43,14 @@ pub enum FileType {
}

/// SLEEP Protocol version.
#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub enum ProtocolVersion {
/// The version specified as per the paper released in 2017-09.
V0,
}

/// Structural representation of 32 byte SLEEP headers.
#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub struct Header {
/// Type of file.
pub file_type: FileType,
Expand All @@ -76,87 +77,15 @@ impl Header {
}
}

/// Parse a 32 bit buffer slice into a valid Header.
pub fn from_vec(buffer: &[u8]) -> Result<Header, Error> {
ensure!(buffer.len() == 32, "buffer should be 32 bytes");

let mut rdr = Cursor::new(buffer);
let byte = rdr.read_u8().unwrap();
ensure!(
byte == 5,
format!(
"The first byte of a SLEEP header should be '5', found {}",
byte
)
);

let byte = rdr.read_u8().unwrap();
ensure!(
byte == 2,
format!(
"The second byte of a SLEEP header should be '2', found {}",
byte
)
);

let byte = rdr.read_u8().unwrap();
ensure!(
byte == 87,
format!(
"The third byte of a SLEEP header should be '87', found {}",
byte
)
);

let file_type = match rdr.read_u8().unwrap() {
0 => FileType::BitField,
1 => FileType::Signatures,
2 => FileType::Tree,
num => bail!(format!(
"The fourth byte '{}' does not belong to any known SLEEP file type",
num
)),
};

let protocol_version = match rdr.read_u8().unwrap() {
0 => ProtocolVersion::V0,
num => bail!(format!(
"The fifth byte '{}' does not belong to any known SLEEP protocol protocol_version",
num
)),
};

// Read entry size which will inform how many bytes to read next.
let entry_size = rdr.read_u16::<BigEndian>().unwrap();

// Read out the "entry_size" bytes into a string.
// NOTE(yw): there should be a more concise way of doing this.
let hash_name_len = rdr.read_u8().unwrap() as usize;
let current = rdr.position() as usize;

let hash_name_upper = current + hash_name_len;
let buf_slice = &buffer[current..hash_name_upper];
rdr.set_position(hash_name_upper as u64 + 1);
let algo = ::std::str::from_utf8(buf_slice)
.expect("The algorithm string was invalid utf8 encoded");

let hash_type = match algo {
"BLAKE2b" => HashType::BLAKE2b,
"Ed25519" => HashType::Ed25519,
_ => HashType::None,
};

for index in rdr.position()..32 {
let byte = rdr.read_u8().unwrap();
ensure!(byte == 0, format!("The remainder of the header should be zero-filled. Found byte '{}' at position '{}'.", byte, index));
}
/// Parses a 32 byte buffer slice into a valid Header.
pub fn from_bytes(buf: &[u8]) -> Result<Header, Error> {
convert_nom_result(buf, parsers::header(buf))
}

Ok(Header {
protocol_version,
entry_size,
file_type,
hash_type,
})
/// Parse a 32 byte buffer slice into a valid Header.
#[deprecated(note = "Use from_bytes")]
pub fn from_vec(buffer: &[u8]) -> Result<Header, Error> {
Header::from_bytes(buffer)
}

/// Convert a `Header` into a `Vec<u8>`. Use this to persist a header back to
Expand Down Expand Up @@ -215,3 +144,36 @@ impl Header {
&& self.hash_type == HashType::BLAKE2b
}
}

fn convert_nom_result(
buf: &[u8],
result: Result<(&[u8], Header), nom::Err<&[u8]>>,
) -> Result<Header, Error> {
match result {
Ok((&[], h)) => Ok(h),
Ok((remaining, _)) => {
assert!(
buf.len() > parsers::HEADER_LENGTH,
"broken parser: input length is {}, but got unparsed input of length {}",
buf.len(),
remaining.len()
);
Err(format_err!("input must be {} bytes", parsers::HEADER_LENGTH))
}
Err(e @ nom::Err::Incomplete(_)) => {
assert!(
buf.len() < parsers::HEADER_LENGTH,
"broken parser: input length is {}, but got error: {:?}",
buf.len(),
e
);
Err(format_err!("input must be {} bytes", parsers::HEADER_LENGTH))
}
Err(nom::Err::Error(context)) => {
Err(format_err!("nom error: {:?}", context.into_error_kind()))
}
Err(nom::Err::Failure(context)) => {
Err(format_err!("nom failure: {:?}", context.into_error_kind()))
}
}
}
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@

#[macro_use]
extern crate failure;
#[macro_use]
extern crate nom;

mod header;
mod parsers;

pub use header::*;

Expand Down
146 changes: 146 additions & 0 deletions src/parsers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#![cfg_attr(feature = "cargo-clippy", allow(clippy))]

use header::*;
use nom::{be_u16, be_u8, rest};
use std::str;

pub(crate) const HEADER_LENGTH: usize = 32;
const VERIFY_TRAILING_ZEROS: bool = true;

named!(
file_type<FileType>,
switch!(be_u8,
0 => value!(FileType::BitField) |
1 => value!(FileType::Signatures) |
2 => value!(FileType::Tree)
)
);

named!(
protocol_version<ProtocolVersion>,
switch!(be_u8,
0 => value!(ProtocolVersion::V0)
)
);

named_args!(
algorithm(len: u8)<HashType>,
switch!(map_res!(take!(len), str::from_utf8),
"BLAKE2b" => value!(HashType::BLAKE2b) |
"Ed25519" => value!(HashType::Ed25519) |
"" => value!(HashType::None)
)
);

named!(
pub header<Header>,
flat_map!(
take!(HEADER_LENGTH),
do_parse!(
tag!(b"\x05\x02\x57") >>
file_type: file_type >>
protocol_version: protocol_version >>
entry_size: be_u16 >>

algorithm_len: verify!(be_u8, |len: u8| len <= HEADER_LENGTH as u8 - 8) >>
algorithm: apply!(algorithm, algorithm_len) >>

verify!(rest, |bytes: &[u8]| {
let header_consumed = bytes.len() + algorithm_len as usize + 8 == HEADER_LENGTH;
let trailing_zeros = !VERIFY_TRAILING_ZEROS || bytes.iter().all(|&b| b == 0u8);
header_consumed && trailing_zeros
}) >>

(Header {
file_type,
protocol_version,
entry_size,
hash_type: algorithm,
})
)
)
);

#[cfg(test)]
mod test {
use super::*;

use nom;

#[test]
fn parse_file_type() {
assert_eq!(
file_type(b"\x00"),
Ok((&[][..], FileType::BitField))
);
assert_eq!(
file_type(b"\x01"),
Ok((&[][..], FileType::Signatures))
);
assert_eq!(
file_type(b"\x02"),
Ok((&[][..], FileType::Tree))
);
assert!(file_type(b"\xff").is_err());
}

#[test]
fn parse_header() {
fn mk_header(prefix: &[u8]) -> [u8; 32] {
let mut h = [0u8; 32];
h[0..prefix.len()].clone_from_slice(prefix);
h
}

assert_eq!(
header(&mk_header(
b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b"
)),
Ok((
&[][..],
Header {
file_type: FileType::Signatures,
protocol_version: ProtocolVersion::V0,
entry_size: 40,
hash_type: HashType::BLAKE2b
}
))
);
assert_eq!(
header(&mk_header(
b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b"
)).unwrap()
.1
.hash_type,
HashType::BLAKE2b
);
assert_eq!(
header(&mk_header(
b"\x05\x02W\x01\x00\x00\x28\x07Ed25519"
)).unwrap()
.1
.hash_type,
HashType::Ed25519
);
assert_eq!(
header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x00"))
.unwrap()
.1
.hash_type,
HashType::None
);
assert!(header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x01B")).is_err());
assert!(header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x01B")).is_err());

let h = b"\x05\x02W\x01\x00\x00\x28\x19BLAKE2bXXXXXXXXXXXXXXXXXX";
assert!(header(h).is_err());
}

#[test]
fn invalid_algorithm_len() {
match header(b"\x05\x02W\x00\x00\x00\x00\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00") {
Err(nom::Err::Error(nom::Context::Code(_, nom::ErrorKind::Verify))) => (),
x => panic!("{:?}", x),
}
}
}
Loading