Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic deserialization proposal #861

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion libafl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ categories = ["development-tools::testing", "emulators", "embedded", "os", "no-s

[features]
default = ["std", "derive", "llmp_compression", "rand_trait", "fork", "prelude"]
std = ["serde_json", "serde_json/std", "hostname", "nix", "serde/std", "bincode", "wait-timeout", "regex", "byteorder", "once_cell", "uuid", "tui_monitor", "ctor", "backtrace", "uds"] # print, env, launcher ... support
std = ["serde_json", "serde_json/std", "hostname", "nix", "serde/std", "bincode", "wait-timeout", "regex", "byteorder", "once_cell", "uuid", "tui_monitor", "ctor", "backtrace", "uds", "input_conversion"] # print, env, launcher ... support
derive = ["libafl_derive"] # provide derive(SerdeAny) macro.
input_conversion = ["inventory", "downcast-rs", "ctor"]
fork = [] # uses the fork() syscall to spawn children, instead of launching a new command, if supported by the OS (has no effect on Windows, no_std).
rand_trait = ["rand_core"] # If set, libafl's rand implementations will implement `rand::Rng`
introspection = [] # Include performance statistics of the fuzzing pipeline
Expand Down Expand Up @@ -94,6 +95,9 @@ z3 = { version = "0.11", features = ["static-link-z3"], optional = true } # for
pyo3 = { version = "0.17", optional = true, features = ["serde", "macros"] }
concat-idents = { version = "1.1.3", optional = true }

inventory = { version = "0.3.2", optional = true }
downcast-rs = { version = "1.2.0", optional = true }

# AGPL
# !!! this create requires nightly
grammartec = { version = "0.2", optional = true }
Expand Down
4 changes: 2 additions & 2 deletions libafl/src/bolts/serdeany.rs
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ create_serde_registry_for_trait!(serdeany_registry, crate::bolts::serdeany::Serd
pub use serdeany_registry::*;

/// Register a `SerdeAny` type in the [`RegistryBuilder`]
#[cfg(feature = "std")]
#[cfg(feature = "ctor")]
#[macro_export]
macro_rules! register_at_startup {
($struct_type:ty) => {
Expand All @@ -619,7 +619,7 @@ macro_rules! register_at_startup {
}

/// Do nothing for `no_std`, you have to register it manually in `main()` with [`RegistryBuilder::register`]
#[cfg(not(feature = "std"))]
#[cfg(not(feature = "ctor"))]
#[macro_export]
macro_rules! register_at_startup {
($struct_type:ty) => {};
Expand Down
58 changes: 56 additions & 2 deletions libafl/src/inputs/bytes.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
//! The `BytesInput` is the "normal" input, a map of bytes, that can be sent directly to the client
//! (As opposed to other, more abstract, inputs, like an Grammar-Based AST Input)

use alloc::{borrow::ToOwned, rc::Rc, string::String, vec::Vec};
use alloc::{borrow::ToOwned, boxed::Box, rc::Rc, string::String, vec::Vec};
use core::{cell::RefCell, convert::From, hash::Hasher};
#[cfg(feature = "std")]
use std::{fs::File, io::Read, path::Path};

use ahash::AHasher;
#[cfg(feature = "input_conversion")]
use postcard::{de_flavors::Slice, Deserializer};
use serde::{Deserialize, Serialize};

#[cfg(feature = "input_conversion")]
use crate::inputs::ConvertibleInput;
#[cfg(feature = "std")]
use crate::{bolts::fs::write_file_atomic, Error};
use crate::{bolts::fs::write_file_atomic, bolts::AsSlice, Error};
use crate::{
bolts::{ownedref::OwnedSlice, HasLen},
inputs::{HasBytesVec, HasTargetBytes, Input},
Expand All @@ -24,6 +28,8 @@ pub struct BytesInput {
}

impl Input for BytesInput {
const NAME: &'static str = "BytesInput";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think here type_name can be used instead of an associated const https://doc.rust-lang.org/std/any/fn.type_name.html

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, awesome -- didn't know about that. I'll patch it out.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, this needs to be an associated const because type_name is not const stable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it will be stabilized soon, see rust-lang/rust#63084

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So keep the const for now but put a comment to remind us to switch to type_name once it is stable

Copy link
Member

@domenukk domenukk Oct 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exact contents and format of the string returned are not specified [...] amongst the strings that type_name::<Option<String>>() might return are "Option<String>" and "std::option::Option<std::string::String>".

Could be an issue, probably a NAME field is safer

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maye it should be called TYPE else it's confusing with our named trait(?)


#[cfg(feature = "std")]
/// Write this input to the file
fn to_file<P>(&self, path: P) -> Result<(), Error>
Expand Down Expand Up @@ -53,6 +59,24 @@ impl Input for BytesInput {
}
}

/// Dynamic deserialisation of any input type that has target bytes
#[cfg(feature = "input_conversion")]
pub fn target_bytes_to_bytes<I: HasTargetBytes + for<'a> Deserialize<'a>>(
buf: &[u8],
) -> Result<Box<dyn ConvertibleInput>, <&mut Deserializer<Slice> as serde::de::Deserializer>::Error>
{
let orig: I = postcard::from_bytes(buf)?;
Ok(Box::new(BytesInput {
bytes: orig.target_bytes().as_slice().to_vec(),
}))
}

#[cfg(feature = "input_conversion")]
inventory::submit! {
use crate::inputs::{GeneralizedInput, InputConversion};
InputConversion::new(GeneralizedInput::NAME, BytesInput::NAME, target_bytes_to_bytes::<GeneralizedInput>)
}

/// Rc Ref-cell from Input
impl From<BytesInput> for Rc<RefCell<BytesInput>> {
fn from(input: BytesInput) -> Self {
Expand Down Expand Up @@ -105,3 +129,33 @@ impl BytesInput {
Self { bytes }
}
}

#[cfg(test)]
mod test {
use alloc::vec::Vec;

use crate::{
bolts::AsSlice,
inputs::{BytesInput, GeneralizedInput, HasTargetBytes, Input, NopInput},
};

#[test]
fn deserialize_generalised_to_bytes() {
let generalised = GeneralizedInput::new(b"hello".to_vec());
let mut buf = Vec::new();
generalised.serialize_dynamic(&mut buf).unwrap();
let bytes = BytesInput::deserialize_dynamic(&buf).unwrap().unwrap();
assert_eq!(bytes.target_bytes().as_slice(), b"hello");
}
Comment on lines +143 to +149
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Example usage of the dynamic deserialisation. #858


#[test]
fn failed_deserialize_from_nop() {
// note that NopInput implements HasTargetBytes, but because we have not submitted the
// conversion BytesInput cannot be converted from NopInput

let nop = NopInput {};
let mut buf = Vec::new();
nop.serialize_dynamic(&mut buf).unwrap();
assert!(BytesInput::deserialize_dynamic(&buf).unwrap().is_none());
}
Comment on lines +152 to +160
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comments here: if the deserialisation for this type cannot occur, it is skipped and not deserialised.

}
2 changes: 2 additions & 0 deletions libafl/src/inputs/encoded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ pub struct EncodedInput {
}

impl Input for EncodedInput {
const NAME: &'static str = "EncodedInput";

/// Generate a name for this input
#[must_use]
fn generate_name(&self, _idx: usize) -> String {
Expand Down
2 changes: 2 additions & 0 deletions libafl/src/inputs/generalized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ pub struct GeneralizedInput {
}

impl Input for GeneralizedInput {
const NAME: &'static str = "GeneralizedInput";

/// Generate a name for this input
fn generate_name(&self, _idx: usize) -> String {
let mut hasher = AHasher::new_with_keys(0, 0);
Expand Down
2 changes: 2 additions & 0 deletions libafl/src/inputs/gramatron.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ pub struct GramatronInput {
}

impl Input for GramatronInput {
const NAME: &'static str = "GramatronInput";

/// Generate a name for this input
#[must_use]
fn generate_name(&self, _idx: usize) -> String {
Expand Down
108 changes: 106 additions & 2 deletions libafl/src/inputs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,22 @@ pub use generalized::*;
#[cfg(feature = "nautilus")]
pub mod nautilus;
use alloc::{
boxed::Box,
string::{String, ToString},
vec::Vec,
};
use core::{clone::Clone, fmt::Debug};
use core::{
clone::Clone,
fmt::{Debug, Formatter},
};
#[cfg(feature = "std")]
use std::{fs::File, hash::Hash, io::Read, path::Path};

#[cfg(feature = "input_conversion")]
use downcast_rs::{impl_downcast, Downcast};
#[cfg(feature = "nautilus")]
pub use nautilus::*;
use postcard::{de_flavors::Slice, Deserializer};
use serde::{Deserialize, Serialize};

#[cfg(feature = "std")]
Expand All @@ -33,6 +40,9 @@ use crate::{bolts::ownedref::OwnedSlice, Error};
/// An input for the target
#[cfg(not(feature = "std"))]
pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug {
/// Name for this input type
const NAME: &'static str;

/// Write this input to the file
fn to_file<P>(&self, _path: P) -> Result<(), Error> {
Err(Error::not_implemented("Not supported in no_std"))
Expand All @@ -52,7 +62,12 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug {

/// An input for the target
#[cfg(feature = "std")]
pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug {
pub trait Input:
Clone + ConvertibleInput + Serialize + serde::de::DeserializeOwned + Debug
{
/// Name for this input type
const NAME: &'static str;

/// Write this input to the file
fn to_file<P>(&self, path: P) -> Result<(), Error>
where
Expand All @@ -72,17 +87,106 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug {
Ok(postcard::from_bytes(&bytes)?)
}

/// Serializes this input to the dynamic serialisation format to pass between different fuzzers
fn serialize_dynamic(&self, buf: &mut Vec<u8>) -> Result<(), postcard::Error> {
buf.extend_from_slice(postcard::to_allocvec(Self::NAME)?.as_slice());
buf.extend_from_slice(postcard::to_allocvec(self)?.as_slice());
Ok(())
}

/// Deserializes this input type from the dynamic serialization format, if possible
fn deserialize_dynamic(
buf: &[u8],
) -> Result<Option<Self>, <&mut Deserializer<Slice> as serde::de::Deserializer>::Error> {
convert_named(buf)
}
Comment on lines +91 to +102
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two methods can be overriden, so it's possible to serialize context and pass it at the end of buf here; just append the context to the end of the message. If the target already has it, it can opt to simply not deserialize it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how big context is, but if it's reasonably sized this could be effective.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

context doesn't need to be serialized, but it is needed to do the serialization as it contains some info to convert the AST to bytes

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does that work with LLMP now? I don't see its unparse getting invoked except in specific places.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't. To serialize with serde there is no need of it, while it is needed to convert the AST stored in the input to bytes. You see it only in the harness.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also EncodedInput has something similar, as it is an array of u32 id representing tokens and the mapping id -> token is ofc not stored in the input itself so to convert to bytes you need to call https://github.com/AFLplusplus/LibAFL/blob/main/libafl/src/inputs/encoded.rs#L75

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the conversion would have to happen at the sending client, not the receiving?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I see why it is convenient to do in the receiver, we can reuse the NewTestcase event and avoid double memory usage

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have to think about it, but I don't see other solutions, the tokens map in the EncodedInput decoder can be large even gigabytes when using a huge initial corpus


/// Generate a name for this input
fn generate_name(&self, idx: usize) -> String;

/// An hook executed if the input is stored as `Testcase`
fn wrapped_as_testcase(&mut self) {}
}

/// Utility trait for downcasting inputs for conversion
#[cfg(feature = "input_conversion")]
pub trait ConvertibleInput: Downcast {}

#[cfg(feature = "input_conversion")]
impl_downcast!(ConvertibleInput);

#[cfg(feature = "input_conversion")]
impl<I: Input> ConvertibleInput for I {}

/// Function signature for conversion methods
#[cfg(feature = "input_conversion")]
pub type InputConversionFn = fn(
&[u8],
) -> Result<
Box<dyn ConvertibleInput>,
<&mut Deserializer<Slice> as serde::de::Deserializer>::Error,
>;

/// Struct for converting between input types at deserialisation time
#[cfg(feature = "input_conversion")]
pub struct InputConversion {
from: &'static str,
to: &'static str,
converter: InputConversionFn,
}

#[cfg(feature = "input_conversion")]
impl Debug for InputConversion {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
f.debug_struct("InputConversion")
.field("from", &self.from)
.field("to", &self.to)
.finish()
}
}

#[cfg(feature = "input_conversion")]
impl InputConversion {
/// Create a new input conversion to be registered
pub const fn new(from: &'static str, to: &'static str, converter: InputConversionFn) -> Self {
Self {
from,
to,
converter,
}
}
}

#[cfg(feature = "input_conversion")]
inventory::collect!(InputConversion);

/// Converts from a serialisation-specified type to the intended type, if such a conversion exists
#[cfg(feature = "input_conversion")]
pub fn convert_named<T: Input>(
bytes: &[u8],
) -> Result<Option<T>, <&mut Deserializer<Slice> as serde::de::Deserializer>::Error> {
let mut deser = Deserializer::from_bytes(bytes);
let from = String::deserialize(&mut deser)?;
if from == T::NAME {
return Ok(Some(T::deserialize(&mut deser)?));
}
for conversion in inventory::iter::<InputConversion> {
if conversion.from == from && conversion.to == T::NAME {
return Ok((conversion.converter)(deser.finalize()?)?
.downcast()
.ok()
.map(|boxed| *boxed));
}
}
Ok(None)
}

/// An input for tests, mainly. There is no real use much else.
#[derive(Copy, Clone, Serialize, Deserialize, Debug, Hash)]
pub struct NopInput {}
impl Input for NopInput {
const NAME: &'static str = "NopInput";

fn generate_name(&self, _idx: usize) -> String {
"nop-input".to_string()
}
Expand Down