Skip to content

Commit

Permalink
REVM fuzz dictionary (#985)
Browse files Browse the repository at this point in the history
* feat: fuzz dictionary

Co-authored-by: brockelmore <31553173+brockelmore@users.noreply.github.com>

* fix: handle malformed bytecode

* fix: limit search for push bytes

* feat: collect fuzz state from logs

* feat: build initial fuzz state from db

* perf: use `Index` instead of `Selector`

Co-authored-by: brockelmore <31553173+brockelmore@users.noreply.github.com>
  • Loading branch information
onbjerg and brockelmore committed Mar 22, 2022
1 parent b672c91 commit 2837663
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 19 deletions.
15 changes: 9 additions & 6 deletions evm/src/executor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ use revm::{
};
use std::collections::BTreeMap;

/// A mapping of addresses to their changed state.
pub type StateChangeset = HashMap<Address, Account>;

#[derive(thiserror::Error, Debug)]
pub enum EvmError {
/// Error which occurred during execution of a transaction
Expand All @@ -54,7 +57,7 @@ pub enum EvmError {
traces: Option<CallTraceArena>,
debug: Option<DebugArena>,
labels: BTreeMap<Address, String>,
state_changeset: HashMap<Address, Account>,
state_changeset: StateChangeset,
},
/// Error which occurred during ABI encoding/decoding
#[error(transparent)]
Expand Down Expand Up @@ -102,7 +105,7 @@ pub struct CallResult<D: Detokenize> {
///
/// This is only present if the changed state was not committed to the database (i.e. if you
/// used `call` and `call_raw` not `call_committing` or `call_raw_committing`).
pub state_changeset: HashMap<Address, Account>,
pub state_changeset: StateChangeset,
}

/// The result of a raw call.
Expand Down Expand Up @@ -130,7 +133,7 @@ pub struct RawCallResult {
///
/// This is only present if the changed state was not committed to the database (i.e. if you
/// used `call` and `call_raw` not `call_committing` or `call_raw_committing`).
pub state_changeset: HashMap<Address, Account>,
pub state_changeset: StateChangeset,
}

impl Default for RawCallResult {
Expand All @@ -145,7 +148,7 @@ impl Default for RawCallResult {
labels: BTreeMap::new(),
traces: None,
debug: None,
state_changeset: HashMap::new(),
state_changeset: StateChangeset::new(),
}
}
}
Expand All @@ -159,7 +162,7 @@ pub struct Executor<DB: DatabaseRef> {
// Also, if we stored the VM here we would still need to
// take `&mut self` when we are not committing to the database, since
// we need to set `evm.env`.
db: CacheDB<DB>,
pub(crate) db: CacheDB<DB>,
env: Env,
inspector_config: InspectorStackConfig,
}
Expand Down Expand Up @@ -411,7 +414,7 @@ where
&self,
address: Address,
reverted: bool,
state_changeset: HashMap<Address, Account>,
state_changeset: StateChangeset,
should_fail: bool,
) -> bool {
// Construct a new VM with the state changeset
Expand Down
19 changes: 16 additions & 3 deletions evm/src/fuzz/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ use proptest::test_runner::{TestCaseError, TestError, TestRunner};
use revm::db::DatabaseRef;
use serde::{Deserialize, Serialize};
use std::{cell::RefCell, collections::BTreeMap, fmt};
use strategies::fuzz_calldata;
use strategies::{
build_initial_state, collect_state_from_call, fuzz_calldata, fuzz_calldata_from_state,
EvmFuzzState,
};

/// Magic return code for the `assume` cheatcode
pub const ASSUME_MAGIC_RETURN_CODE: &[u8] = "FOUNDRY::ASSUME".as_bytes();
Expand Down Expand Up @@ -54,14 +57,21 @@ where
should_fail: bool,
errors: Option<&Abi>,
) -> FuzzTestResult {
let strat = fuzz_calldata(func);

// Stores the consumed gas and calldata of every successful fuzz call
let cases: RefCell<Vec<FuzzCase>> = RefCell::new(Default::default());

// Stores the result of the last call
let call: RefCell<RawCallResult> = RefCell::new(Default::default());

// Stores fuzz state for use with [fuzz_calldata_from_state]
let state: EvmFuzzState = build_initial_state(&self.executor.db);

// TODO: We should have a `FuzzerOpts` struct where we can configure the fuzzer. When we
// have that, we should add a way to configure strategy weights
let strat = proptest::strategy::Union::new_weighted(vec![
(60, fuzz_calldata(func.clone())),
(40, fuzz_calldata_from_state(func.clone(), state.clone())),
]);
tracing::debug!(func = ?func.name, should_fail, "fuzzing");
let run_result = self.runner.clone().run(&strat, |calldata| {
*call.borrow_mut() = self
Expand All @@ -70,6 +80,9 @@ where
.expect("could not make raw evm call");
let call = call.borrow();

// Build fuzzer state
collect_state_from_call(&call.logs, &call.state_changeset, state.clone());

// When assume cheat code is triggered return a special string "FOUNDRY::ASSUME"
if call.result.as_ref() == ASSUME_MAGIC_RETURN_CODE {
return Err(TestCaseError::reject("ASSUME: Too many rejects"))
Expand Down
16 changes: 9 additions & 7 deletions evm/src/fuzz/strategies/calldata.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
use super::fuzz_param;
use ethers::{abi::Function, types::Bytes};
use proptest::prelude::Strategy;
use proptest::prelude::{BoxedStrategy, Strategy};

/// Given a function, it returns a proptest strategy which generates valid abi-encoded calldata
/// Given a function, it returns a strategy which generates valid calldata
/// for that function's input types.
pub fn fuzz_calldata(func: &Function) -> impl Strategy<Value = Bytes> + '_ {
pub fn fuzz_calldata(func: Function) -> BoxedStrategy<Bytes> {
// We need to compose all the strategies generated for each parameter in all
// possible combinations
let strats = func.inputs.iter().map(|input| fuzz_param(&input.kind)).collect::<Vec<_>>();

strats.prop_map(move |tokens| {
tracing::trace!(input = ?tokens);
func.encode_input(&tokens).unwrap().into()
})
strats
.prop_map(move |tokens| {
tracing::trace!(input = ?tokens);
func.encode_input(&tokens).unwrap().into()
})
.boxed()
}
7 changes: 6 additions & 1 deletion evm/src/fuzz/strategies/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@ mod uint;
pub use uint::UintStrategy;

mod param;
pub use param::fuzz_param;
pub use param::{fuzz_param, fuzz_param_from_state};

mod calldata;
pub use calldata::fuzz_calldata;

mod state;
pub use state::{
build_initial_state, collect_state_from_call, fuzz_calldata_from_state, EvmFuzzState,
};
81 changes: 79 additions & 2 deletions evm/src/fuzz/strategies/param.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ use ethers::{
};
use proptest::prelude::*;

use super::state::EvmFuzzState;

/// The max length of arrays we fuzz for is 256.
pub const MAX_ARRAY_LEN: usize = 256;

/// Given an ethabi parameter type, returns a proptest strategy for generating values for that
/// datatype. Works with ABI Encoder v2 tuples.
/// Given a parameter type, returns a strategy for generating values for that type.
///
/// Works with ABI Encoder v2 tuples.
pub fn fuzz_param(param: &ParamType) -> impl Strategy<Value = Token> {
match param {
ParamType::Address => {
Expand Down Expand Up @@ -63,3 +66,77 @@ pub fn fuzz_param(param: &ParamType) -> impl Strategy<Value = Token> {
}
}
}

/// Given a parameter type, returns a strategy for generating values for that type, given some EVM
/// fuzz state.
///
/// Works with ABI Encoder v2 tuples.
pub fn fuzz_param_from_state(param: &ParamType, state: EvmFuzzState) -> BoxedStrategy<Token> {
// These are to comply with lifetime requirements
let state_len = state.borrow().len();
let s = state.clone();

// Select a value from the state
let value = any::<prop::sample::Index>()
.prop_map(move |index| index.index(state_len))
.prop_map(move |index| *s.borrow().iter().nth(index).unwrap());

// Convert the value based on the parameter type
match param {
ParamType::Address => {
value.prop_map(move |value| Address::from_slice(&value[12..]).into_token()).boxed()
}
ParamType::Bytes => value.prop_map(move |value| Bytes::from(value).into_token()).boxed(),
ParamType::Int(n) => match n / 8 {
32 => {
value.prop_map(move |value| I256::from_raw(U256::from(value)).into_token()).boxed()
}
y @ 1..=31 => value
.prop_map(move |value| {
// Generate a uintN in the correct range, then shift it to the range of intN
// by subtracting 2^(N-1)
let uint = U256::from(value) % U256::from(2usize).pow(U256::from(y * 8));
let max_int_plus1 = U256::from(2usize).pow(U256::from(y * 8 - 1));
let num = I256::from_raw(uint.overflowing_sub(max_int_plus1).0);
num.into_token()
})
.boxed(),
_ => panic!("unsupported solidity type int{}", n),
},
ParamType::Uint(n) => match n / 8 {
32 => value.prop_map(move |value| U256::from(value).into_token()).boxed(),
y @ 1..=31 => value
.prop_map(move |value| {
(U256::from(value) % (U256::from(2usize).pow(U256::from(y * 8)))).into_token()
})
.boxed(),
_ => panic!("unsupported solidity type uint{}", n),
},
ParamType::Bool => value.prop_map(move |value| Token::Bool(value[31] == 1)).boxed(),
ParamType::String => value
.prop_map(move |value| {
Token::String(unsafe { std::str::from_utf8_unchecked(&value[..]).to_string() })
})
.boxed(),
ParamType::Array(param) => {
proptest::collection::vec(fuzz_param_from_state(param, state), 0..MAX_ARRAY_LEN)
.prop_map(Token::Array)
.boxed()
}
ParamType::FixedBytes(size) => {
let size = *size;
value.prop_map(move |value| Token::FixedBytes(value[32 - size..].to_vec())).boxed()
}
ParamType::FixedArray(param, size) => {
proptest::collection::vec(fuzz_param_from_state(param, state), 0..*size)
.prop_map(Token::FixedArray)
.boxed()
}
ParamType::Tuple(params) => params
.iter()
.map(|p| fuzz_param_from_state(p, state.clone()))
.collect::<Vec<_>>()
.prop_map(Token::Tuple)
.boxed(),
}
}
151 changes: 151 additions & 0 deletions evm/src/fuzz/strategies/state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
use super::fuzz_param_from_state;
use crate::executor::StateChangeset;
use bytes::Bytes;
use ethers::{
abi::{Function, RawLog},
types::{H256, U256},
};
use proptest::prelude::{BoxedStrategy, Strategy};
use revm::{
db::{CacheDB, DatabaseRef},
opcode, spec_opcode_gas, SpecId,
};
use std::{cell::RefCell, collections::HashSet, io::Write, rc::Rc};

/// A set of arbitrary 32 byte data from the VM used to generate values for the strategy.
///
/// Wrapped in a shareable container.
pub type EvmFuzzState = Rc<RefCell<HashSet<[u8; 32]>>>;

/// Given a function and some state, it returns a strategy which generated valid calldata for the
/// given function's input types, based on state taken from the EVM.
pub fn fuzz_calldata_from_state(
func: Function,
state: EvmFuzzState,
) -> BoxedStrategy<ethers::types::Bytes> {
let strats = func
.inputs
.iter()
.map(|input| fuzz_param_from_state(&input.kind, state.clone()))
.collect::<Vec<_>>();

strats
.prop_map(move |tokens| {
tracing::trace!(input = ?tokens);
func.encode_input(&tokens).unwrap().into()
})
.no_shrink()
.boxed()
}

/// Builds the initial [EvmFuzzState] from a database.
pub fn build_initial_state<DB: DatabaseRef>(db: &CacheDB<DB>) -> EvmFuzzState {
let mut state: HashSet<[u8; 32]> = HashSet::new();
for (address, storage) in db.storage() {
let info = db.basic(*address);

// Insert basic account information
state.insert(H256::from(*address).into());
state.insert(u256_to_h256(info.balance).into());
state.insert(u256_to_h256(U256::from(info.nonce)).into());

// Insert storage
for (slot, value) in storage {
state.insert(u256_to_h256(*slot).into());
state.insert(u256_to_h256(*value).into());
}
}

Rc::new(RefCell::new(state))
}

/// Collects state changes from a [StateChangeset] and logs into an [EvmFuzzState].
pub fn collect_state_from_call(
logs: &[RawLog],
state_changeset: &StateChangeset,
state: EvmFuzzState,
) {
let state = &mut *state.borrow_mut();

for (address, account) in state_changeset {
// Insert basic account information
state.insert(H256::from(*address).into());
state.insert(u256_to_h256(account.info.balance).into());
state.insert(u256_to_h256(U256::from(account.info.nonce)).into());

// Insert storage
for (slot, value) in &account.storage {
state.insert(u256_to_h256(*slot).into());
state.insert(u256_to_h256(*value).into());
}

// Insert push bytes
if let Some(code) = &account.info.code {
for push_byte in collect_push_bytes(code.clone()) {
state.insert(push_byte);
}
}

// Insert log topics and data
for log in logs {
log.topics.iter().for_each(|topic| {
state.insert(topic.0);
});
log.data.chunks(32).for_each(|chunk| {
let mut buffer: [u8; 32] = [0; 32];
let _ = (&mut buffer[..])
.write(chunk)
.expect("log data chunk was larger than 32 bytes");
state.insert(buffer);
});
}
}
}

/// The maximum number of bytes we will look at in bytecodes to find push bytes (24 KiB).
///
/// This is to limit the performance impact of fuzz tests that might deploy arbitrarily sized
/// bytecode (as is the case with Solmate).
const PUSH_BYTE_ANALYSIS_LIMIT: usize = 24 * 1024;

/// Collects all push bytes from the given bytecode.
fn collect_push_bytes(code: Bytes) -> Vec<[u8; 32]> {
let mut bytes: Vec<[u8; 32]> = Vec::new();

// We use [SpecId::LATEST] since we do not really care what spec it is - we are not interested
// in gas costs.
let opcode_infos = spec_opcode_gas(SpecId::LATEST);

let mut i = 0;
while i < code.len().min(PUSH_BYTE_ANALYSIS_LIMIT) {
let op = code[i];
if opcode_infos[op as usize].is_push {
let push_size = (op - opcode::PUSH1 + 1) as usize;
let push_start = i + 1;
let push_end = push_start + push_size;

// As a precaution, if a fuzz test deploys malformed bytecode (such as using `CREATE2`)
// this will terminate the loop early.
if push_start > code.len() || push_end > code.len() {
return bytes
}

let mut buffer: [u8; 32] = [0; 32];
let _ = (&mut buffer[..])
.write(&code[push_start..push_end])
.expect("push was larger than 32 bytes");
bytes.push(buffer);
i += push_size;
}
i += 1;
}

bytes
}

/// Small helper function to convert [U256] into [H256].
fn u256_to_h256(u: U256) -> H256 {
let mut h = H256::default();
u.to_little_endian(h.as_mut());
h
}

0 comments on commit 2837663

Please sign in to comment.