REVM fuzz dictionary (#985)

* feat: fuzz dictionary Co-authored-by: brockelmore <31553173+brockelmore@users.noreply.github.com> * fix: handle malformed bytecode * fix: limit search for push bytes * feat: collect fuzz state from logs * feat: build initial fuzz state from db * perf: use `Index` instead of `Selector` Co-authored-by: brockelmore <31553173+brockelmore@users.noreply.github.com>
foundry-rs · Mar 22, 2022 · 2837663 · 2837663
1 parent b672c91
commit 2837663
Show file tree

Hide file tree

Showing 6 changed files with 270 additions and 19 deletions.
diff --git a/evm/src/executor/mod.rs b/evm/src/executor/mod.rs
@@ -41,6 +41,9 @@ use revm::{
 };
 use std::collections::BTreeMap;
 
+/// A mapping of addresses to their changed state.
+pub type StateChangeset = HashMap<Address, Account>;
+
 #[derive(thiserror::Error, Debug)]
 pub enum EvmError {
  /// Error which occurred during execution of a transaction
@@ -54,7 +57,7 @@ pub enum EvmError {
  traces: Option<CallTraceArena>,
  debug: Option<DebugArena>,
  labels: BTreeMap<Address, String>,
- state_changeset: HashMap<Address, Account>,
+ state_changeset: StateChangeset,
  },
  /// Error which occurred during ABI encoding/decoding
  #[error(transparent)]
@@ -102,7 +105,7 @@ pub struct CallResult<D: Detokenize> {
  ///
  /// This is only present if the changed state was not committed to the database (i.e. if you
  /// used `call` and `call_raw` not `call_committing` or `call_raw_committing`).
- pub state_changeset: HashMap<Address, Account>,
+ pub state_changeset: StateChangeset,
 }
 
 /// The result of a raw call.
@@ -130,7 +133,7 @@ pub struct RawCallResult {
  ///
  /// This is only present if the changed state was not committed to the database (i.e. if you
  /// used `call` and `call_raw` not `call_committing` or `call_raw_committing`).
- pub state_changeset: HashMap<Address, Account>,
+ pub state_changeset: StateChangeset,
 }
 
 impl Default for RawCallResult {
@@ -145,7 +148,7 @@ impl Default for RawCallResult {
  labels: BTreeMap::new(),
  traces: None,
  debug: None,
- state_changeset: HashMap::new(),
+ state_changeset: StateChangeset::new(),
  }
  }
 }
@@ -159,7 +162,7 @@ pub struct Executor<DB: DatabaseRef> {
  // Also, if we stored the VM here we would still need to
  // take `&mut self` when we are not committing to the database, since
  // we need to set `evm.env`.
- db: CacheDB<DB>,
+ pub(crate) db: CacheDB<DB>,
  env: Env,
  inspector_config: InspectorStackConfig,
 }
@@ -411,7 +414,7 @@ where
  &self,
  address: Address,
  reverted: bool,
- state_changeset: HashMap<Address, Account>,
+ state_changeset: StateChangeset,
  should_fail: bool,
  ) -> bool {
  // Construct a new VM with the state changeset

diff --git a/evm/src/fuzz/mod.rs b/evm/src/fuzz/mod.rs
@@ -14,7 +14,10 @@ use proptest::test_runner::{TestCaseError, TestError, TestRunner};
 use revm::db::DatabaseRef;
 use serde::{Deserialize, Serialize};
 use std::{cell::RefCell, collections::BTreeMap, fmt};
-use strategies::fuzz_calldata;
+use strategies::{
+ build_initial_state, collect_state_from_call, fuzz_calldata, fuzz_calldata_from_state,
+ EvmFuzzState,
+};
 
 /// Magic return code for the `assume` cheatcode
 pub const ASSUME_MAGIC_RETURN_CODE: &[u8] = "FOUNDRY::ASSUME".as_bytes();
@@ -54,14 +57,21 @@ where
  should_fail: bool,
  errors: Option<&Abi>,
  ) -> FuzzTestResult {
- let strat = fuzz_calldata(func);
-
  // Stores the consumed gas and calldata of every successful fuzz call
  let cases: RefCell<Vec<FuzzCase>> = RefCell::new(Default::default());
 
  // Stores the result of the last call
  let call: RefCell<RawCallResult> = RefCell::new(Default::default());
 
+ // Stores fuzz state for use with [fuzz_calldata_from_state]
+ let state: EvmFuzzState = build_initial_state(&self.executor.db);
+
+ // TODO: We should have a `FuzzerOpts` struct where we can configure the fuzzer. When we
+ // have that, we should add a way to configure strategy weights
+ let strat = proptest::strategy::Union::new_weighted(vec![
+ (60, fuzz_calldata(func.clone())),
+ (40, fuzz_calldata_from_state(func.clone(), state.clone())),
+ ]);
  tracing::debug!(func = ?func.name, should_fail, "fuzzing");
  let run_result = self.runner.clone().run(&strat, |calldata| {
  *call.borrow_mut() = self
@@ -70,6 +80,9 @@ where
  .expect("could not make raw evm call");
  let call = call.borrow();
 
+ // Build fuzzer state
+ collect_state_from_call(&call.logs, &call.state_changeset, state.clone());
+
  // When assume cheat code is triggered return a special string "FOUNDRY::ASSUME"
  if call.result.as_ref() == ASSUME_MAGIC_RETURN_CODE {
  return Err(TestCaseError::reject("ASSUME: Too many rejects"))

diff --git a/evm/src/fuzz/strategies/calldata.rs b/evm/src/fuzz/strategies/calldata.rs
@@ -1,16 +1,18 @@
 use super::fuzz_param;
 use ethers::{abi::Function, types::Bytes};
-use proptest::prelude::Strategy;
+use proptest::prelude::{BoxedStrategy, Strategy};
 
-/// Given a function, it returns a proptest strategy which generates valid abi-encoded calldata
+/// Given a function, it returns a strategy which generates valid calldata
 /// for that function's input types.
-pub fn fuzz_calldata(func: &Function) -> impl Strategy<Value = Bytes> + '_ {
+pub fn fuzz_calldata(func: Function) -> BoxedStrategy<Bytes> {
  // We need to compose all the strategies generated for each parameter in all
  // possible combinations
  let strats = func.inputs.iter().map(|input| fuzz_param(&input.kind)).collect::<Vec<_>>();
 
- strats.prop_map(move |tokens| {
- tracing::trace!(input = ?tokens);
- func.encode_input(&tokens).unwrap().into()
- })
+ strats
+ .prop_map(move |tokens| {
+ tracing::trace!(input = ?tokens);
+ func.encode_input(&tokens).unwrap().into()
+ })
+ .boxed()
 }
diff --git a/evm/src/fuzz/strategies/mod.rs b/evm/src/fuzz/strategies/mod.rs
@@ -2,7 +2,12 @@ mod uint;
 pub use uint::UintStrategy;
 
 mod param;
-pub use param::fuzz_param;
+pub use param::{fuzz_param, fuzz_param_from_state};
 
 mod calldata;
 pub use calldata::fuzz_calldata;
+
+mod state;
+pub use state::{
+ build_initial_state, collect_state_from_call, fuzz_calldata_from_state, EvmFuzzState,
+};
diff --git a/evm/src/fuzz/strategies/param.rs b/evm/src/fuzz/strategies/param.rs
@@ -4,11 +4,14 @@ use ethers::{
 };
 use proptest::prelude::*;
 
+use super::state::EvmFuzzState;
+
 /// The max length of arrays we fuzz for is 256.
 pub const MAX_ARRAY_LEN: usize = 256;
 
-/// Given an ethabi parameter type, returns a proptest strategy for generating values for that
-/// datatype. Works with ABI Encoder v2 tuples.
+/// Given a parameter type, returns a strategy for generating values for that type.
+///
+/// Works with ABI Encoder v2 tuples.
 pub fn fuzz_param(param: &ParamType) -> impl Strategy<Value = Token> {
  match param {
  ParamType::Address => {
@@ -63,3 +66,77 @@ pub fn fuzz_param(param: &ParamType) -> impl Strategy<Value = Token> {
  }
  }
 }
+
+/// Given a parameter type, returns a strategy for generating values for that type, given some EVM
+/// fuzz state.
+///
+/// Works with ABI Encoder v2 tuples.
+pub fn fuzz_param_from_state(param: &ParamType, state: EvmFuzzState) -> BoxedStrategy<Token> {
+ // These are to comply with lifetime requirements
+ let state_len = state.borrow().len();
+ let s = state.clone();
+
+ // Select a value from the state
+ let value = any::<prop::sample::Index>()
+ .prop_map(move |index| index.index(state_len))
+ .prop_map(move |index| *s.borrow().iter().nth(index).unwrap());
+
+ // Convert the value based on the parameter type
+ match param {
+ ParamType::Address => {
+ value.prop_map(move |value| Address::from_slice(&value[12..]).into_token()).boxed()
+ }
+ ParamType::Bytes => value.prop_map(move |value| Bytes::from(value).into_token()).boxed(),
+ ParamType::Int(n) => match n / 8 {
+ 32 => {
+ value.prop_map(move |value| I256::from_raw(U256::from(value)).into_token()).boxed()
+ }
+ y @ 1..=31 => value
+ .prop_map(move |value| {
+ // Generate a uintN in the correct range, then shift it to the range of intN
+ // by subtracting 2^(N-1)
+ let uint = U256::from(value) % U256::from(2usize).pow(U256::from(y * 8));
+ let max_int_plus1 = U256::from(2usize).pow(U256::from(y * 8 - 1));
+ let num = I256::from_raw(uint.overflowing_sub(max_int_plus1).0);
+ num.into_token()
+ })
+ .boxed(),
+ _ => panic!("unsupported solidity type int{}", n),
+ },
+ ParamType::Uint(n) => match n / 8 {
+ 32 => value.prop_map(move |value| U256::from(value).into_token()).boxed(),
+ y @ 1..=31 => value
+ .prop_map(move |value| {
+ (U256::from(value) % (U256::from(2usize).pow(U256::from(y * 8)))).into_token()
+ })
+ .boxed(),
+ _ => panic!("unsupported solidity type uint{}", n),
+ },
+ ParamType::Bool => value.prop_map(move |value| Token::Bool(value[31] == 1)).boxed(),
+ ParamType::String => value
+ .prop_map(move |value| {
+ Token::String(unsafe { std::str::from_utf8_unchecked(&value[..]).to_string() })
+ })
+ .boxed(),
+ ParamType::Array(param) => {
+ proptest::collection::vec(fuzz_param_from_state(param, state), 0..MAX_ARRAY_LEN)
+ .prop_map(Token::Array)
+ .boxed()
+ }
+ ParamType::FixedBytes(size) => {
+ let size = *size;
+ value.prop_map(move |value| Token::FixedBytes(value[32 - size..].to_vec())).boxed()
+ }
+ ParamType::FixedArray(param, size) => {
+ proptest::collection::vec(fuzz_param_from_state(param, state), 0..*size)
+ .prop_map(Token::FixedArray)
+ .boxed()
+ }
+ ParamType::Tuple(params) => params
+ .iter()
+ .map(|p| fuzz_param_from_state(p, state.clone()))
+ .collect::<Vec<_>>()
+ .prop_map(Token::Tuple)
+ .boxed(),
+ }
+}
diff --git a/evm/src/fuzz/strategies/state.rs b/evm/src/fuzz/strategies/state.rs
@@ -0,0 +1,151 @@
+use super::fuzz_param_from_state;
+use crate::executor::StateChangeset;
+use bytes::Bytes;
+use ethers::{
+ abi::{Function, RawLog},
+ types::{H256, U256},
+};
+use proptest::prelude::{BoxedStrategy, Strategy};
+use revm::{
+ db::{CacheDB, DatabaseRef},
+ opcode, spec_opcode_gas, SpecId,
+};
+use std::{cell::RefCell, collections::HashSet, io::Write, rc::Rc};
+
+/// A set of arbitrary 32 byte data from the VM used to generate values for the strategy.
+///
+/// Wrapped in a shareable container.
+pub type EvmFuzzState = Rc<RefCell<HashSet<[u8; 32]>>>;
+
+/// Given a function and some state, it returns a strategy which generated valid calldata for the
+/// given function's input types, based on state taken from the EVM.
+pub fn fuzz_calldata_from_state(
+ func: Function,
+ state: EvmFuzzState,
+) -> BoxedStrategy<ethers::types::Bytes> {
+ let strats = func
+ .inputs
+ .iter()
+ .map(|input| fuzz_param_from_state(&input.kind, state.clone()))
+ .collect::<Vec<_>>();
+
+ strats
+ .prop_map(move |tokens| {
+ tracing::trace!(input = ?tokens);
+ func.encode_input(&tokens).unwrap().into()
+ })
+ .no_shrink()
+ .boxed()
+}
+
+/// Builds the initial [EvmFuzzState] from a database.
+pub fn build_initial_state<DB: DatabaseRef>(db: &CacheDB<DB>) -> EvmFuzzState {
+ let mut state: HashSet<[u8; 32]> = HashSet::new();
+ for (address, storage) in db.storage() {
+ let info = db.basic(*address);
+
+ // Insert basic account information
+ state.insert(H256::from(*address).into());
+ state.insert(u256_to_h256(info.balance).into());
+ state.insert(u256_to_h256(U256::from(info.nonce)).into());
+
+ // Insert storage
+ for (slot, value) in storage {
+ state.insert(u256_to_h256(*slot).into());
+ state.insert(u256_to_h256(*value).into());
+ }
+ }
+
+ Rc::new(RefCell::new(state))
+}
+
+/// Collects state changes from a [StateChangeset] and logs into an [EvmFuzzState].
+pub fn collect_state_from_call(
+ logs: &[RawLog],
+ state_changeset: &StateChangeset,
+ state: EvmFuzzState,
+) {
+ let state = &mut *state.borrow_mut();
+
+ for (address, account) in state_changeset {
+ // Insert basic account information
+ state.insert(H256::from(*address).into());
+ state.insert(u256_to_h256(account.info.balance).into());
+ state.insert(u256_to_h256(U256::from(account.info.nonce)).into());
+
+ // Insert storage
+ for (slot, value) in &account.storage {
+ state.insert(u256_to_h256(*slot).into());
+ state.insert(u256_to_h256(*value).into());
+ }
+
+ // Insert push bytes
+ if let Some(code) = &account.info.code {
+ for push_byte in collect_push_bytes(code.clone()) {
+ state.insert(push_byte);
+ }
+ }
+
+ // Insert log topics and data
+ for log in logs {
+ log.topics.iter().for_each(|topic| {
+ state.insert(topic.0);
+ });
+ log.data.chunks(32).for_each(|chunk| {
+ let mut buffer: [u8; 32] = [0; 32];
+ let _ = (&mut buffer[..])
+ .write(chunk)
+ .expect("log data chunk was larger than 32 bytes");
+ state.insert(buffer);
+ });
+ }
+ }
+}
+
+/// The maximum number of bytes we will look at in bytecodes to find push bytes (24 KiB).
+///
+/// This is to limit the performance impact of fuzz tests that might deploy arbitrarily sized
+/// bytecode (as is the case with Solmate).
+const PUSH_BYTE_ANALYSIS_LIMIT: usize = 24 * 1024;
+
+/// Collects all push bytes from the given bytecode.
+fn collect_push_bytes(code: Bytes) -> Vec<[u8; 32]> {
+ let mut bytes: Vec<[u8; 32]> = Vec::new();
+
+ // We use [SpecId::LATEST] since we do not really care what spec it is - we are not interested
+ // in gas costs.
+ let opcode_infos = spec_opcode_gas(SpecId::LATEST);
+
+ let mut i = 0;
+ while i < code.len().min(PUSH_BYTE_ANALYSIS_LIMIT) {
+ let op = code[i];
+ if opcode_infos[op as usize].is_push {
+ let push_size = (op - opcode::PUSH1 + 1) as usize;
+ let push_start = i + 1;
+ let push_end = push_start + push_size;
+
+ // As a precaution, if a fuzz test deploys malformed bytecode (such as using `CREATE2`)
+ // this will terminate the loop early.
+ if push_start > code.len() || push_end > code.len() {
+ return bytes
+ }
+
+ let mut buffer: [u8; 32] = [0; 32];
+ let _ = (&mut buffer[..])
+ .write(&code[push_start..push_end])
+ .expect("push was larger than 32 bytes");
+ bytes.push(buffer);
+ i += push_size;
+ }
+ i += 1;
+ }
+
+ bytes
+}
+
+/// Small helper function to convert [U256] into [H256].
+fn u256_to_h256(u: U256) -> H256 {
+ let mut h = H256::default();
+ u.to_little_endian(h.as_mut());
+ h
+}