From 1727b0028ef448925b49f1bc8592d9af54307d9e Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 24 Oct 2022 15:41:07 +0200 Subject: [PATCH 1/5] dynamic deserialisation example --- libafl/Cargo.toml | 5 +- libafl/src/inputs/bytes.rs | 44 ++++++++++++- libafl/src/inputs/encoded.rs | 2 + libafl/src/inputs/generalized.rs | 2 + libafl/src/inputs/gramatron.rs | 2 + libafl/src/inputs/mod.rs | 104 ++++++++++++++++++++++++++++++- 6 files changed, 155 insertions(+), 4 deletions(-) diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index 8b9b375e56..3a1098d647 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -13,7 +13,7 @@ categories = ["development-tools::testing", "emulators", "embedded", "os", "no-s [features] default = ["std", "derive", "llmp_compression", "rand_trait", "fork", "prelude"] -std = ["serde_json", "serde_json/std", "hostname", "nix", "serde/std", "bincode", "wait-timeout", "regex", "byteorder", "once_cell", "uuid", "tui_monitor", "ctor", "backtrace", "uds"] # print, env, launcher ... support +std = ["serde_json", "serde_json/std", "hostname", "nix", "serde/std", "bincode", "wait-timeout", "regex", "byteorder", "once_cell", "uuid", "tui_monitor", "ctor", "backtrace", "uds", "inventory", "downcast-rs"] # print, env, launcher ... support derive = ["libafl_derive"] # provide derive(SerdeAny) macro. fork = [] # uses the fork() syscall to spawn children, instead of launching a new command, if supported by the OS (has no effect on Windows, no_std). rand_trait = ["rand_core"] # If set, libafl's rand implementations will implement `rand::Rng` @@ -88,6 +88,9 @@ z3 = { version = "0.11", features = ["static-link-z3"], optional = true } # for pyo3 = { version = "0.17", optional = true, features = ["serde", "macros"] } concat-idents = { version = "1.1.3", optional = true } +inventory = { version = "0.3.2", optional = true } +downcast-rs = { version = "1.2.0", optional = true } + # AGPL # !!! this create requires nightly grammartec = { version = "0.2", optional = true } diff --git a/libafl/src/inputs/bytes.rs b/libafl/src/inputs/bytes.rs index 9e3b8353ed..255c37d4b3 100644 --- a/libafl/src/inputs/bytes.rs +++ b/libafl/src/inputs/bytes.rs @@ -3,14 +3,17 @@ use alloc::{borrow::ToOwned, rc::Rc, string::String, vec::Vec}; use core::{cell::RefCell, convert::From, hash::Hasher}; +use std::prelude::rust_2015::Box; #[cfg(feature = "std")] use std::{fs::File, io::Read, path::Path}; use ahash::AHasher; +#[cfg(feature = "std")] +use postcard::{de_flavors::Slice, Deserializer}; use serde::{Deserialize, Serialize}; #[cfg(feature = "std")] -use crate::{bolts::fs::write_file_atomic, Error}; +use crate::{bolts::fs::write_file_atomic, bolts::AsSlice, inputs::ConvertibleInput, Error}; use crate::{ bolts::{ownedref::OwnedSlice, HasLen}, inputs::{HasBytesVec, HasTargetBytes, Input}, @@ -24,6 +27,8 @@ pub struct BytesInput { } impl Input for BytesInput { + const NAME: &'static str = "BytesInput"; + #[cfg(feature = "std")] /// Write this input to the file fn to_file

(&self, path: P) -> Result<(), Error> @@ -53,6 +58,24 @@ impl Input for BytesInput { } } +/// Dynamic deserialisation of any input type that has target bytes +#[cfg(feature = "std")] +pub fn target_bytes_to_bytes Deserialize<'a>>( + buf: &[u8], +) -> Result, <&mut Deserializer as serde::de::Deserializer>::Error> +{ + let orig: I = postcard::from_bytes(buf)?; + Ok(Box::new(BytesInput { + bytes: orig.target_bytes().as_slice().to_vec(), + })) +} + +#[cfg(feature = "std")] +inventory::submit! { + use crate::inputs::{GeneralizedInput, InputConversion}; + InputConversion::new(GeneralizedInput::NAME, BytesInput::NAME, target_bytes_to_bytes::) +} + /// Rc Ref-cell from Input impl From for Rc> { fn from(input: BytesInput) -> Self { @@ -105,3 +128,22 @@ impl BytesInput { Self { bytes } } } + +#[cfg(test)] +mod test { + use alloc::vec::Vec; + + use crate::{ + bolts::AsSlice, + inputs::{BytesInput, GeneralizedInput, HasTargetBytes, Input}, + }; + + #[test] + fn deserialize_generalised_to_bytes() { + let generalised = GeneralizedInput::new(b"hello".to_vec()); + let mut buf = Vec::new(); + generalised.serialize_dynamic(&mut buf).unwrap(); + let bytes = BytesInput::deserialize_dynamic(&buf).unwrap().unwrap(); + assert_eq!(bytes.target_bytes().as_slice(), b"hello"); + } +} diff --git a/libafl/src/inputs/encoded.rs b/libafl/src/inputs/encoded.rs index b73bb8cd3f..4c63acf61b 100644 --- a/libafl/src/inputs/encoded.rs +++ b/libafl/src/inputs/encoded.rs @@ -196,6 +196,8 @@ pub struct EncodedInput { } impl Input for EncodedInput { + const NAME: &'static str = "EncodedInput"; + /// Generate a name for this input #[must_use] fn generate_name(&self, _idx: usize) -> String { diff --git a/libafl/src/inputs/generalized.rs b/libafl/src/inputs/generalized.rs index a32040b9c2..23fed59d6f 100644 --- a/libafl/src/inputs/generalized.rs +++ b/libafl/src/inputs/generalized.rs @@ -35,6 +35,8 @@ pub struct GeneralizedInput { } impl Input for GeneralizedInput { + const NAME: &'static str = "GeneralizedInput"; + /// Generate a name for this input fn generate_name(&self, _idx: usize) -> String { let mut hasher = AHasher::new_with_keys(0, 0); diff --git a/libafl/src/inputs/gramatron.rs b/libafl/src/inputs/gramatron.rs index a6c73085e1..332379cdf0 100644 --- a/libafl/src/inputs/gramatron.rs +++ b/libafl/src/inputs/gramatron.rs @@ -38,6 +38,8 @@ pub struct GramatronInput { } impl Input for GramatronInput { + const NAME: &'static str = "GramatronInput"; + /// Generate a name for this input #[must_use] fn generate_name(&self, _idx: usize) -> String { diff --git a/libafl/src/inputs/mod.rs b/libafl/src/inputs/mod.rs index aae307ea5f..5b84b44a7a 100644 --- a/libafl/src/inputs/mod.rs +++ b/libafl/src/inputs/mod.rs @@ -15,15 +15,21 @@ pub use generalized::*; #[cfg(feature = "nautilus")] pub mod nautilus; use alloc::{ + boxed::Box, string::{String, ToString}, vec::Vec, }; -use core::{clone::Clone, fmt::Debug}; +use core::{ + clone::Clone, + fmt::{Debug, Formatter}, +}; #[cfg(feature = "std")] use std::{fs::File, hash::Hash, io::Read, path::Path}; +use downcast_rs::{impl_downcast, Downcast}; #[cfg(feature = "nautilus")] pub use nautilus::*; +use postcard::{de_flavors::Slice, Deserializer}; use serde::{Deserialize, Serialize}; #[cfg(feature = "std")] @@ -33,6 +39,9 @@ use crate::{bolts::ownedref::OwnedSlice, Error}; /// An input for the target #[cfg(not(feature = "std"))] pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { + /// Name for this input type + const NAME: &'static str; + /// Write this input to the file fn to_file

(&self, _path: P) -> Result<(), Error> { Err(Error::not_implemented("Not supported in no_std")) @@ -52,7 +61,12 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { /// An input for the target #[cfg(feature = "std")] -pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { +pub trait Input: + Clone + ConvertibleInput + Serialize + serde::de::DeserializeOwned + Debug +{ + /// Name for this input type + const NAME: &'static str; + /// Write this input to the file fn to_file

(&self, path: P) -> Result<(), Error> where @@ -72,6 +86,20 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { Ok(postcard::from_bytes(&bytes)?) } + /// Serializes this input to the dynamic serialisation format to pass between different fuzzers + fn serialize_dynamic(&self, buf: &mut Vec) -> Result<(), postcard::Error> { + buf.extend_from_slice(postcard::to_allocvec(Self::NAME)?.as_slice()); + buf.extend_from_slice(postcard::to_allocvec(self)?.as_slice()); + Ok(()) + } + + /// Deserializes this input type from the dynamic serialization format, if possible + fn deserialize_dynamic( + buf: &[u8], + ) -> Result, <&mut Deserializer as serde::de::Deserializer>::Error> { + convert_named(buf) + } + /// Generate a name for this input fn generate_name(&self, idx: usize) -> String; @@ -79,10 +107,82 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { fn wrapped_as_testcase(&mut self) {} } +/// Utility trait for downcasting inputs for conversion +#[cfg(feature = "std")] +pub trait ConvertibleInput: Downcast {} + +#[cfg(feature = "std")] +impl_downcast!(ConvertibleInput); + +#[cfg(feature = "std")] +impl ConvertibleInput for I {} + +/// Function signature for conversion methods +#[cfg(feature = "std")] +pub type InputConversionFn = fn( + &[u8], +) -> Result< + Box, + <&mut Deserializer as serde::de::Deserializer>::Error, +>; + +/// Struct for converting between input types at deserialisation time +#[cfg(feature = "std")] +pub struct InputConversion { + from: &'static str, + to: &'static str, + converter: InputConversionFn, +} + +#[cfg(feature = "std")] +impl Debug for InputConversion { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + f.debug_struct("InputConversion") + .field("from", &self.from) + .field("to", &self.to) + .finish() + } +} + +#[cfg(feature = "std")] +impl InputConversion { + /// Create a new input conversion to be registered + pub const fn new(from: &'static str, to: &'static str, converter: InputConversionFn) -> Self { + Self { + from, + to, + converter, + } + } +} + +#[cfg(feature = "std")] +inventory::collect!(InputConversion); + +/// Converts from a serialisation-specified type to the intended type, if such a conversion exists +#[cfg(feature = "std")] +pub fn convert_named( + bytes: &[u8], +) -> Result, <&mut Deserializer as serde::de::Deserializer>::Error> { + let mut deser = Deserializer::from_bytes(bytes); + let from = String::deserialize(&mut deser)?; + for conversion in inventory::iter:: { + if conversion.from == from && conversion.to == T::NAME { + return Ok((conversion.converter)(deser.finalize()?)? + .downcast() + .ok() + .map(|boxed| *boxed)); + } + } + Ok(None) +} + /// An input for tests, mainly. There is no real use much else. #[derive(Copy, Clone, Serialize, Deserialize, Debug, Hash)] pub struct NopInput {} impl Input for NopInput { + const NAME: &'static str = "NopInput"; + fn generate_name(&self, _idx: usize) -> String { "nop-input".to_string() } From d3ba8aecd75bb52e41a89bf59b2e2fa83b69723d Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 24 Oct 2022 15:52:02 +0200 Subject: [PATCH 2/5] add failing case --- libafl/src/inputs/bytes.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/libafl/src/inputs/bytes.rs b/libafl/src/inputs/bytes.rs index 255c37d4b3..88bb893beb 100644 --- a/libafl/src/inputs/bytes.rs +++ b/libafl/src/inputs/bytes.rs @@ -135,7 +135,7 @@ mod test { use crate::{ bolts::AsSlice, - inputs::{BytesInput, GeneralizedInput, HasTargetBytes, Input}, + inputs::{BytesInput, GeneralizedInput, HasTargetBytes, Input, NopInput}, }; #[test] @@ -146,4 +146,15 @@ mod test { let bytes = BytesInput::deserialize_dynamic(&buf).unwrap().unwrap(); assert_eq!(bytes.target_bytes().as_slice(), b"hello"); } + + #[test] + fn failed_deserialize_from_nop() { + // note that NopInput implements HasTargetBytes, but because we have not submitted the + // conversion BytesInput cannot be converted from NopInput + + let nop = NopInput {}; + let mut buf = Vec::new(); + nop.serialize_dynamic(&mut buf).unwrap(); + assert!(BytesInput::deserialize_dynamic(&buf).unwrap().is_none()); + } } From e3f6098da3558bbe5a313d65e828b2e95093772d Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 24 Oct 2022 16:05:55 +0200 Subject: [PATCH 3/5] fix box import --- libafl/src/inputs/bytes.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libafl/src/inputs/bytes.rs b/libafl/src/inputs/bytes.rs index 88bb893beb..b57d9c2a85 100644 --- a/libafl/src/inputs/bytes.rs +++ b/libafl/src/inputs/bytes.rs @@ -1,9 +1,8 @@ //! The `BytesInput` is the "normal" input, a map of bytes, that can be sent directly to the client //! (As opposed to other, more abstract, inputs, like an Grammar-Based AST Input) -use alloc::{borrow::ToOwned, rc::Rc, string::String, vec::Vec}; +use alloc::{borrow::ToOwned, boxed::Box, rc::Rc, string::String, vec::Vec}; use core::{cell::RefCell, convert::From, hash::Hasher}; -use std::prelude::rust_2015::Box; #[cfg(feature = "std")] use std::{fs::File, io::Read, path::Path}; From d3521e16287fd02e02983389bdc96a359aefaa5f Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Mon, 24 Oct 2022 17:20:02 +0200 Subject: [PATCH 4/5] input_conversion feature --- libafl/Cargo.toml | 3 ++- libafl/src/bolts/serdeany.rs | 4 ++-- libafl/src/inputs/bytes.rs | 10 ++++++---- libafl/src/inputs/mod.rs | 19 ++++++++++--------- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index 3a1098d647..5e34a147a0 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -13,8 +13,9 @@ categories = ["development-tools::testing", "emulators", "embedded", "os", "no-s [features] default = ["std", "derive", "llmp_compression", "rand_trait", "fork", "prelude"] -std = ["serde_json", "serde_json/std", "hostname", "nix", "serde/std", "bincode", "wait-timeout", "regex", "byteorder", "once_cell", "uuid", "tui_monitor", "ctor", "backtrace", "uds", "inventory", "downcast-rs"] # print, env, launcher ... support +std = ["serde_json", "serde_json/std", "hostname", "nix", "serde/std", "bincode", "wait-timeout", "regex", "byteorder", "once_cell", "uuid", "tui_monitor", "ctor", "backtrace", "uds", "input_conversion"] # print, env, launcher ... support derive = ["libafl_derive"] # provide derive(SerdeAny) macro. +input_conversion = ["inventory", "downcast-rs", "ctor"] fork = [] # uses the fork() syscall to spawn children, instead of launching a new command, if supported by the OS (has no effect on Windows, no_std). rand_trait = ["rand_core"] # If set, libafl's rand implementations will implement `rand::Rng` introspection = [] # Include performance statistics of the fuzzing pipeline diff --git a/libafl/src/bolts/serdeany.rs b/libafl/src/bolts/serdeany.rs index 60d6bee70a..1c717fec73 100644 --- a/libafl/src/bolts/serdeany.rs +++ b/libafl/src/bolts/serdeany.rs @@ -605,7 +605,7 @@ create_serde_registry_for_trait!(serdeany_registry, crate::bolts::serdeany::Serd pub use serdeany_registry::*; /// Register a `SerdeAny` type in the [`RegistryBuilder`] -#[cfg(feature = "std")] +#[cfg(feature = "ctor")] #[macro_export] macro_rules! register_at_startup { ($struct_type:ty) => { @@ -619,7 +619,7 @@ macro_rules! register_at_startup { } /// Do nothing for `no_std`, you have to register it manually in `main()` with [`RegistryBuilder::register`] -#[cfg(not(feature = "std"))] +#[cfg(not(feature = "ctor"))] #[macro_export] macro_rules! register_at_startup { ($struct_type:ty) => {}; diff --git a/libafl/src/inputs/bytes.rs b/libafl/src/inputs/bytes.rs index b57d9c2a85..224243d0b0 100644 --- a/libafl/src/inputs/bytes.rs +++ b/libafl/src/inputs/bytes.rs @@ -7,12 +7,14 @@ use core::{cell::RefCell, convert::From, hash::Hasher}; use std::{fs::File, io::Read, path::Path}; use ahash::AHasher; -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] use postcard::{de_flavors::Slice, Deserializer}; use serde::{Deserialize, Serialize}; +#[cfg(feature = "input_conversion")] +use crate::inputs::ConvertibleInput; #[cfg(feature = "std")] -use crate::{bolts::fs::write_file_atomic, bolts::AsSlice, inputs::ConvertibleInput, Error}; +use crate::{bolts::fs::write_file_atomic, bolts::AsSlice, Error}; use crate::{ bolts::{ownedref::OwnedSlice, HasLen}, inputs::{HasBytesVec, HasTargetBytes, Input}, @@ -58,7 +60,7 @@ impl Input for BytesInput { } /// Dynamic deserialisation of any input type that has target bytes -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] pub fn target_bytes_to_bytes Deserialize<'a>>( buf: &[u8], ) -> Result, <&mut Deserializer as serde::de::Deserializer>::Error> @@ -69,7 +71,7 @@ pub fn target_bytes_to_bytes Deserialize<'a>>( })) } -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] inventory::submit! { use crate::inputs::{GeneralizedInput, InputConversion}; InputConversion::new(GeneralizedInput::NAME, BytesInput::NAME, target_bytes_to_bytes::) diff --git a/libafl/src/inputs/mod.rs b/libafl/src/inputs/mod.rs index 5b84b44a7a..9fb942c5d2 100644 --- a/libafl/src/inputs/mod.rs +++ b/libafl/src/inputs/mod.rs @@ -26,6 +26,7 @@ use core::{ #[cfg(feature = "std")] use std::{fs::File, hash::Hash, io::Read, path::Path}; +#[cfg(feature = "input_conversion")] use downcast_rs::{impl_downcast, Downcast}; #[cfg(feature = "nautilus")] pub use nautilus::*; @@ -108,17 +109,17 @@ pub trait Input: } /// Utility trait for downcasting inputs for conversion -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] pub trait ConvertibleInput: Downcast {} -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] impl_downcast!(ConvertibleInput); -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] impl ConvertibleInput for I {} /// Function signature for conversion methods -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] pub type InputConversionFn = fn( &[u8], ) -> Result< @@ -127,14 +128,14 @@ pub type InputConversionFn = fn( >; /// Struct for converting between input types at deserialisation time -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] pub struct InputConversion { from: &'static str, to: &'static str, converter: InputConversionFn, } -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] impl Debug for InputConversion { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { f.debug_struct("InputConversion") @@ -144,7 +145,7 @@ impl Debug for InputConversion { } } -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] impl InputConversion { /// Create a new input conversion to be registered pub const fn new(from: &'static str, to: &'static str, converter: InputConversionFn) -> Self { @@ -156,11 +157,11 @@ impl InputConversion { } } -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] inventory::collect!(InputConversion); /// Converts from a serialisation-specified type to the intended type, if such a conversion exists -#[cfg(feature = "std")] +#[cfg(feature = "input_conversion")] pub fn convert_named( bytes: &[u8], ) -> Result, <&mut Deserializer as serde::de::Deserializer>::Error> { From 7155437772074660804587eae49af5d615711f06 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 24 Oct 2022 17:52:48 +0200 Subject: [PATCH 5/5] prevent the need for a self-translation if from is to --- libafl/src/inputs/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libafl/src/inputs/mod.rs b/libafl/src/inputs/mod.rs index 9fb942c5d2..955035fe63 100644 --- a/libafl/src/inputs/mod.rs +++ b/libafl/src/inputs/mod.rs @@ -167,6 +167,9 @@ pub fn convert_named( ) -> Result, <&mut Deserializer as serde::de::Deserializer>::Error> { let mut deser = Deserializer::from_bytes(bytes); let from = String::deserialize(&mut deser)?; + if from == T::NAME { + return Ok(Some(T::deserialize(&mut deser)?)); + } for conversion in inventory::iter:: { if conversion.from == from && conversion.to == T::NAME { return Ok((conversion.converter)(deser.finalize()?)?