From 283766316659689740ee3398c7b5c1d67e548d73 Mon Sep 17 00:00:00 2001 From: Bjerg Date: Mon, 21 Mar 2022 03:46:08 +0100 Subject: [PATCH] REVM fuzz dictionary (#985) * feat: fuzz dictionary Co-authored-by: brockelmore <31553173+brockelmore@users.noreply.github.com> * fix: handle malformed bytecode * fix: limit search for push bytes * feat: collect fuzz state from logs * feat: build initial fuzz state from db * perf: use `Index` instead of `Selector` Co-authored-by: brockelmore <31553173+brockelmore@users.noreply.github.com> --- evm/src/executor/mod.rs | 15 +-- evm/src/fuzz/mod.rs | 19 +++- evm/src/fuzz/strategies/calldata.rs | 16 +-- evm/src/fuzz/strategies/mod.rs | 7 +- evm/src/fuzz/strategies/param.rs | 81 ++++++++++++++- evm/src/fuzz/strategies/state.rs | 151 ++++++++++++++++++++++++++++ 6 files changed, 270 insertions(+), 19 deletions(-) create mode 100644 evm/src/fuzz/strategies/state.rs diff --git a/evm/src/executor/mod.rs b/evm/src/executor/mod.rs index d2b7c735d401..231d633b58c3 100644 --- a/evm/src/executor/mod.rs +++ b/evm/src/executor/mod.rs @@ -41,6 +41,9 @@ use revm::{ }; use std::collections::BTreeMap; +/// A mapping of addresses to their changed state. +pub type StateChangeset = HashMap; + #[derive(thiserror::Error, Debug)] pub enum EvmError { /// Error which occurred during execution of a transaction @@ -54,7 +57,7 @@ pub enum EvmError { traces: Option, debug: Option, labels: BTreeMap, - state_changeset: HashMap, + state_changeset: StateChangeset, }, /// Error which occurred during ABI encoding/decoding #[error(transparent)] @@ -102,7 +105,7 @@ pub struct CallResult { /// /// This is only present if the changed state was not committed to the database (i.e. if you /// used `call` and `call_raw` not `call_committing` or `call_raw_committing`). - pub state_changeset: HashMap, + pub state_changeset: StateChangeset, } /// The result of a raw call. @@ -130,7 +133,7 @@ pub struct RawCallResult { /// /// This is only present if the changed state was not committed to the database (i.e. if you /// used `call` and `call_raw` not `call_committing` or `call_raw_committing`). - pub state_changeset: HashMap, + pub state_changeset: StateChangeset, } impl Default for RawCallResult { @@ -145,7 +148,7 @@ impl Default for RawCallResult { labels: BTreeMap::new(), traces: None, debug: None, - state_changeset: HashMap::new(), + state_changeset: StateChangeset::new(), } } } @@ -159,7 +162,7 @@ pub struct Executor { // Also, if we stored the VM here we would still need to // take `&mut self` when we are not committing to the database, since // we need to set `evm.env`. - db: CacheDB, + pub(crate) db: CacheDB, env: Env, inspector_config: InspectorStackConfig, } @@ -411,7 +414,7 @@ where &self, address: Address, reverted: bool, - state_changeset: HashMap, + state_changeset: StateChangeset, should_fail: bool, ) -> bool { // Construct a new VM with the state changeset diff --git a/evm/src/fuzz/mod.rs b/evm/src/fuzz/mod.rs index fdfa1f64046a..ad358dcbbb13 100644 --- a/evm/src/fuzz/mod.rs +++ b/evm/src/fuzz/mod.rs @@ -14,7 +14,10 @@ use proptest::test_runner::{TestCaseError, TestError, TestRunner}; use revm::db::DatabaseRef; use serde::{Deserialize, Serialize}; use std::{cell::RefCell, collections::BTreeMap, fmt}; -use strategies::fuzz_calldata; +use strategies::{ + build_initial_state, collect_state_from_call, fuzz_calldata, fuzz_calldata_from_state, + EvmFuzzState, +}; /// Magic return code for the `assume` cheatcode pub const ASSUME_MAGIC_RETURN_CODE: &[u8] = "FOUNDRY::ASSUME".as_bytes(); @@ -54,14 +57,21 @@ where should_fail: bool, errors: Option<&Abi>, ) -> FuzzTestResult { - let strat = fuzz_calldata(func); - // Stores the consumed gas and calldata of every successful fuzz call let cases: RefCell> = RefCell::new(Default::default()); // Stores the result of the last call let call: RefCell = RefCell::new(Default::default()); + // Stores fuzz state for use with [fuzz_calldata_from_state] + let state: EvmFuzzState = build_initial_state(&self.executor.db); + + // TODO: We should have a `FuzzerOpts` struct where we can configure the fuzzer. When we + // have that, we should add a way to configure strategy weights + let strat = proptest::strategy::Union::new_weighted(vec![ + (60, fuzz_calldata(func.clone())), + (40, fuzz_calldata_from_state(func.clone(), state.clone())), + ]); tracing::debug!(func = ?func.name, should_fail, "fuzzing"); let run_result = self.runner.clone().run(&strat, |calldata| { *call.borrow_mut() = self @@ -70,6 +80,9 @@ where .expect("could not make raw evm call"); let call = call.borrow(); + // Build fuzzer state + collect_state_from_call(&call.logs, &call.state_changeset, state.clone()); + // When assume cheat code is triggered return a special string "FOUNDRY::ASSUME" if call.result.as_ref() == ASSUME_MAGIC_RETURN_CODE { return Err(TestCaseError::reject("ASSUME: Too many rejects")) diff --git a/evm/src/fuzz/strategies/calldata.rs b/evm/src/fuzz/strategies/calldata.rs index f3c332138809..be9e24a7289b 100644 --- a/evm/src/fuzz/strategies/calldata.rs +++ b/evm/src/fuzz/strategies/calldata.rs @@ -1,16 +1,18 @@ use super::fuzz_param; use ethers::{abi::Function, types::Bytes}; -use proptest::prelude::Strategy; +use proptest::prelude::{BoxedStrategy, Strategy}; -/// Given a function, it returns a proptest strategy which generates valid abi-encoded calldata +/// Given a function, it returns a strategy which generates valid calldata /// for that function's input types. -pub fn fuzz_calldata(func: &Function) -> impl Strategy + '_ { +pub fn fuzz_calldata(func: Function) -> BoxedStrategy { // We need to compose all the strategies generated for each parameter in all // possible combinations let strats = func.inputs.iter().map(|input| fuzz_param(&input.kind)).collect::>(); - strats.prop_map(move |tokens| { - tracing::trace!(input = ?tokens); - func.encode_input(&tokens).unwrap().into() - }) + strats + .prop_map(move |tokens| { + tracing::trace!(input = ?tokens); + func.encode_input(&tokens).unwrap().into() + }) + .boxed() } diff --git a/evm/src/fuzz/strategies/mod.rs b/evm/src/fuzz/strategies/mod.rs index 10a4a3bb5578..4c5d31d6835b 100644 --- a/evm/src/fuzz/strategies/mod.rs +++ b/evm/src/fuzz/strategies/mod.rs @@ -2,7 +2,12 @@ mod uint; pub use uint::UintStrategy; mod param; -pub use param::fuzz_param; +pub use param::{fuzz_param, fuzz_param_from_state}; mod calldata; pub use calldata::fuzz_calldata; + +mod state; +pub use state::{ + build_initial_state, collect_state_from_call, fuzz_calldata_from_state, EvmFuzzState, +}; diff --git a/evm/src/fuzz/strategies/param.rs b/evm/src/fuzz/strategies/param.rs index cc4376c8045d..e86b15e85c86 100644 --- a/evm/src/fuzz/strategies/param.rs +++ b/evm/src/fuzz/strategies/param.rs @@ -4,11 +4,14 @@ use ethers::{ }; use proptest::prelude::*; +use super::state::EvmFuzzState; + /// The max length of arrays we fuzz for is 256. pub const MAX_ARRAY_LEN: usize = 256; -/// Given an ethabi parameter type, returns a proptest strategy for generating values for that -/// datatype. Works with ABI Encoder v2 tuples. +/// Given a parameter type, returns a strategy for generating values for that type. +/// +/// Works with ABI Encoder v2 tuples. pub fn fuzz_param(param: &ParamType) -> impl Strategy { match param { ParamType::Address => { @@ -63,3 +66,77 @@ pub fn fuzz_param(param: &ParamType) -> impl Strategy { } } } + +/// Given a parameter type, returns a strategy for generating values for that type, given some EVM +/// fuzz state. +/// +/// Works with ABI Encoder v2 tuples. +pub fn fuzz_param_from_state(param: &ParamType, state: EvmFuzzState) -> BoxedStrategy { + // These are to comply with lifetime requirements + let state_len = state.borrow().len(); + let s = state.clone(); + + // Select a value from the state + let value = any::() + .prop_map(move |index| index.index(state_len)) + .prop_map(move |index| *s.borrow().iter().nth(index).unwrap()); + + // Convert the value based on the parameter type + match param { + ParamType::Address => { + value.prop_map(move |value| Address::from_slice(&value[12..]).into_token()).boxed() + } + ParamType::Bytes => value.prop_map(move |value| Bytes::from(value).into_token()).boxed(), + ParamType::Int(n) => match n / 8 { + 32 => { + value.prop_map(move |value| I256::from_raw(U256::from(value)).into_token()).boxed() + } + y @ 1..=31 => value + .prop_map(move |value| { + // Generate a uintN in the correct range, then shift it to the range of intN + // by subtracting 2^(N-1) + let uint = U256::from(value) % U256::from(2usize).pow(U256::from(y * 8)); + let max_int_plus1 = U256::from(2usize).pow(U256::from(y * 8 - 1)); + let num = I256::from_raw(uint.overflowing_sub(max_int_plus1).0); + num.into_token() + }) + .boxed(), + _ => panic!("unsupported solidity type int{}", n), + }, + ParamType::Uint(n) => match n / 8 { + 32 => value.prop_map(move |value| U256::from(value).into_token()).boxed(), + y @ 1..=31 => value + .prop_map(move |value| { + (U256::from(value) % (U256::from(2usize).pow(U256::from(y * 8)))).into_token() + }) + .boxed(), + _ => panic!("unsupported solidity type uint{}", n), + }, + ParamType::Bool => value.prop_map(move |value| Token::Bool(value[31] == 1)).boxed(), + ParamType::String => value + .prop_map(move |value| { + Token::String(unsafe { std::str::from_utf8_unchecked(&value[..]).to_string() }) + }) + .boxed(), + ParamType::Array(param) => { + proptest::collection::vec(fuzz_param_from_state(param, state), 0..MAX_ARRAY_LEN) + .prop_map(Token::Array) + .boxed() + } + ParamType::FixedBytes(size) => { + let size = *size; + value.prop_map(move |value| Token::FixedBytes(value[32 - size..].to_vec())).boxed() + } + ParamType::FixedArray(param, size) => { + proptest::collection::vec(fuzz_param_from_state(param, state), 0..*size) + .prop_map(Token::FixedArray) + .boxed() + } + ParamType::Tuple(params) => params + .iter() + .map(|p| fuzz_param_from_state(p, state.clone())) + .collect::>() + .prop_map(Token::Tuple) + .boxed(), + } +} diff --git a/evm/src/fuzz/strategies/state.rs b/evm/src/fuzz/strategies/state.rs new file mode 100644 index 000000000000..2ef824cb832f --- /dev/null +++ b/evm/src/fuzz/strategies/state.rs @@ -0,0 +1,151 @@ +use super::fuzz_param_from_state; +use crate::executor::StateChangeset; +use bytes::Bytes; +use ethers::{ + abi::{Function, RawLog}, + types::{H256, U256}, +}; +use proptest::prelude::{BoxedStrategy, Strategy}; +use revm::{ + db::{CacheDB, DatabaseRef}, + opcode, spec_opcode_gas, SpecId, +}; +use std::{cell::RefCell, collections::HashSet, io::Write, rc::Rc}; + +/// A set of arbitrary 32 byte data from the VM used to generate values for the strategy. +/// +/// Wrapped in a shareable container. +pub type EvmFuzzState = Rc>>; + +/// Given a function and some state, it returns a strategy which generated valid calldata for the +/// given function's input types, based on state taken from the EVM. +pub fn fuzz_calldata_from_state( + func: Function, + state: EvmFuzzState, +) -> BoxedStrategy { + let strats = func + .inputs + .iter() + .map(|input| fuzz_param_from_state(&input.kind, state.clone())) + .collect::>(); + + strats + .prop_map(move |tokens| { + tracing::trace!(input = ?tokens); + func.encode_input(&tokens).unwrap().into() + }) + .no_shrink() + .boxed() +} + +/// Builds the initial [EvmFuzzState] from a database. +pub fn build_initial_state(db: &CacheDB) -> EvmFuzzState { + let mut state: HashSet<[u8; 32]> = HashSet::new(); + for (address, storage) in db.storage() { + let info = db.basic(*address); + + // Insert basic account information + state.insert(H256::from(*address).into()); + state.insert(u256_to_h256(info.balance).into()); + state.insert(u256_to_h256(U256::from(info.nonce)).into()); + + // Insert storage + for (slot, value) in storage { + state.insert(u256_to_h256(*slot).into()); + state.insert(u256_to_h256(*value).into()); + } + } + + Rc::new(RefCell::new(state)) +} + +/// Collects state changes from a [StateChangeset] and logs into an [EvmFuzzState]. +pub fn collect_state_from_call( + logs: &[RawLog], + state_changeset: &StateChangeset, + state: EvmFuzzState, +) { + let state = &mut *state.borrow_mut(); + + for (address, account) in state_changeset { + // Insert basic account information + state.insert(H256::from(*address).into()); + state.insert(u256_to_h256(account.info.balance).into()); + state.insert(u256_to_h256(U256::from(account.info.nonce)).into()); + + // Insert storage + for (slot, value) in &account.storage { + state.insert(u256_to_h256(*slot).into()); + state.insert(u256_to_h256(*value).into()); + } + + // Insert push bytes + if let Some(code) = &account.info.code { + for push_byte in collect_push_bytes(code.clone()) { + state.insert(push_byte); + } + } + + // Insert log topics and data + for log in logs { + log.topics.iter().for_each(|topic| { + state.insert(topic.0); + }); + log.data.chunks(32).for_each(|chunk| { + let mut buffer: [u8; 32] = [0; 32]; + let _ = (&mut buffer[..]) + .write(chunk) + .expect("log data chunk was larger than 32 bytes"); + state.insert(buffer); + }); + } + } +} + +/// The maximum number of bytes we will look at in bytecodes to find push bytes (24 KiB). +/// +/// This is to limit the performance impact of fuzz tests that might deploy arbitrarily sized +/// bytecode (as is the case with Solmate). +const PUSH_BYTE_ANALYSIS_LIMIT: usize = 24 * 1024; + +/// Collects all push bytes from the given bytecode. +fn collect_push_bytes(code: Bytes) -> Vec<[u8; 32]> { + let mut bytes: Vec<[u8; 32]> = Vec::new(); + + // We use [SpecId::LATEST] since we do not really care what spec it is - we are not interested + // in gas costs. + let opcode_infos = spec_opcode_gas(SpecId::LATEST); + + let mut i = 0; + while i < code.len().min(PUSH_BYTE_ANALYSIS_LIMIT) { + let op = code[i]; + if opcode_infos[op as usize].is_push { + let push_size = (op - opcode::PUSH1 + 1) as usize; + let push_start = i + 1; + let push_end = push_start + push_size; + + // As a precaution, if a fuzz test deploys malformed bytecode (such as using `CREATE2`) + // this will terminate the loop early. + if push_start > code.len() || push_end > code.len() { + return bytes + } + + let mut buffer: [u8; 32] = [0; 32]; + let _ = (&mut buffer[..]) + .write(&code[push_start..push_end]) + .expect("push was larger than 32 bytes"); + bytes.push(buffer); + i += push_size; + } + i += 1; + } + + bytes +} + +/// Small helper function to convert [U256] into [H256]. +fn u256_to_h256(u: U256) -> H256 { + let mut h = H256::default(); + u.to_little_endian(h.as_mut()); + h +}