AFLplusplus · domenukk · Dec 28, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -56,28 +56,13 @@ license = "MIT OR Apache-2.0"
 # Internal deps
 libafl = { path = "./libafl", version = "0.14.1", default-features = false }
 libafl_bolts = { path = "./libafl_bolts", version = "0.14.1", default-features = false }
-libafl_cc = { path = "./libafl_cc", version = "0.14.1", default-features = false }
-symcc_runtime = { path = "./libafl_concolic/symcc_runtime", version = "0.14.1", default-features = false }
 symcc_libafl = { path = "./libafl_concolic/symcc_libafl", version = "0.14.1", default-features = false }
 libafl_derive = { path = "./libafl_derive", version = "0.14.1", default-features = false }
-libafl_frida = { path = "./libafl_frida", version = "0.14.1", default-features = false }
 libafl_intelpt = { path = "./libafl_intelpt", version = "0.14.1", default-features = false }
-libafl_libfuzzer = { path = "./libafl_libfuzzer", version = "0.14.1", default-features = false }
-libafl_nyx = { path = "./libafl_nyx", version = "0.14.1", default-features = false }
 libafl_targets = { path = "./libafl_targets", version = "0.14.1", default-features = false }
-libafl_tinyinst = { path = "./libafl_tinyinst", version = "0.14.1", default-features = false }
 libafl_qemu = { path = "./libafl_qemu", version = "0.14.1", default-features = false }
 libafl_qemu_build = { path = "./libafl_qemu/libafl_qemu_build", version = "0.14.1", default-features = false }
 libafl_qemu_sys = { path = "./libafl_qemu/libafl_qemu_sys", version = "0.14.1", default-features = false }
-libafl_sugar = { path = "./libafl_sugar", version = "0.14.1", default-features = false }
-dump_constraints = { path = "./libafl_concolic/test/dump_constraints", version = "0.14.1", default-features = false }
-runtime_test = { path = "./libafl_concolic/test/runtime_test", version = "0.14.1", default-features = false }
-build_and_test_fuzzers = { path = "./utils/build_and_test_fuzzers", version = "0.14.1", default-features = false }
-deexit = { path = "./utils/deexit", version = "0.14.1", default-features = false }
-drcov_utils = { path = "./utils/drcov_utils", version = "0.14.1", default-features = false }
-construct_automata = { path = "./utils/gramatron/construct_automata", version = "0.14.1", default-features = false }
-libafl_benches = { path = "./utils/libafl_benches", version = "0.14.1", default-features = false }
-libafl_jumper = { path = "./utils/libafl_jumper", version = "0.14.1", default-features = false }
 
 # External deps
 ahash = { version = "0.8.11", default-features = false }     # The hash function already used in hashbrown

diff --git a/fuzzers/baby/baby_fuzzer_custom_executor/Cargo.toml b/fuzzers/baby/baby_fuzzer_custom_executor/Cargo.toml
@@ -8,8 +8,9 @@ authors = [
 edition = "2021"
 
 [features]
-default = ["std"]
+default = ["std", "bloom_filter"]
 tui = ["libafl/tui_monitor"]
+bloom_filter = ["std"]
 std = []
 
 [profile.dev]

diff --git a/fuzzers/baby/baby_fuzzer_custom_executor/src/main.rs b/fuzzers/baby/baby_fuzzer_custom_executor/src/main.rs
@@ -133,7 +133,12 @@ pub fn main() {
     let scheduler = QueueScheduler::new();
 
     // A fuzzer with feedbacks and a corpus scheduler
+    #[cfg(not(feature = "bloom_filter"))]
     let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
+    #[cfg(feature = "bloom_filter")]
+    let mut fuzzer =
+        StdFuzzer::new_with_bloom_filter(scheduler, feedback, objective, 10_000_000, 0.001)
+            .unwrap();
 
     // Create the executor for an in-process function with just one observer
     let executor = CustomExecutor::new(&state);

diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml
@@ -291,6 +291,7 @@ document-features = { workspace = true, optional = true }
 clap = { workspace = true, optional = true }
 num_enum = { workspace = true, optional = true }
 libipt = { workspace = true, optional = true }
+bloomfilter = "3.0.1"
 
 [lints]
 workspace = true

diff --git a/libafl/src/executors/inprocess/mod.rs b/libafl/src/executors/inprocess/mod.rs
@@ -562,7 +562,7 @@ mod tests {
         let mut mgr = NopEventManager::new();
         let mut state =
             StdState::new(rand, corpus, solutions, &mut feedback, &mut objective).unwrap();
-        let mut fuzzer = StdFuzzer::<_, _, _>::new(sche, feedback, objective);
+        let mut fuzzer = StdFuzzer::new(sche, feedback, objective);
 
         let mut in_process_executor = InProcessExecutor::new(
             &mut harness,

diff --git a/libafl/src/fuzzer/mod.rs b/libafl/src/fuzzer/mod.rs
@@ -2,7 +2,10 @@
 
 use alloc::{string::ToString, vec::Vec};
 use core::{fmt::Debug, time::Duration};
+#[cfg(feature = "std")]
+use std::hash::Hash;
 
+use bloomfilter::Bloom;
 use libafl_bolts::{current_time, tuples::MatchName};
 use serde::Serialize;
 
@@ -243,13 +246,14 @@ pub enum ExecuteInputResult {
 
 /// Your default fuzzer instance, for everyday use.
 #[derive(Debug)]
-pub struct StdFuzzer<CS, F, OF> {
+pub struct StdFuzzer<CS, F, OF, IF> {
     scheduler: CS,
     feedback: F,
     objective: OF,
+    input_filter: IF,
 }
 
-impl<CS, F, OF, S> HasScheduler<<S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, OF>
+impl<CS, F, OF, S, IF> HasScheduler<<S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, OF, IF>
 where
     S: HasCorpus,
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
@@ -265,7 +269,7 @@ where
     }
 }
 
-impl<CS, F, OF> HasFeedback for StdFuzzer<CS, F, OF> {
+impl<CS, F, OF, IF> HasFeedback for StdFuzzer<CS, F, OF, IF> {
     type Feedback = F;
 
     fn feedback(&self) -> &Self::Feedback {
@@ -277,7 +281,7 @@ impl<CS, F, OF> HasFeedback for StdFuzzer<CS, F, OF> {
     }
 }
 
-impl<CS, F, OF> HasObjective for StdFuzzer<CS, F, OF> {
+impl<CS, F, OF, IF> HasObjective for StdFuzzer<CS, F, OF, IF> {
     type Objective = OF;
 
     fn objective(&self) -> &OF {
@@ -289,8 +293,8 @@ impl<CS, F, OF> HasObjective for StdFuzzer<CS, F, OF> {
     }
 }
 
-impl<CS, EM, F, OF, OT, S> ExecutionProcessor<EM, <S::Corpus as Corpus>::Input, OT, S>
-    for StdFuzzer<CS, F, OF>
+impl<CS, EM, F, OF, OT, S, IF> ExecutionProcessor<EM, <S::Corpus as Corpus>::Input, OT, S>
+    for StdFuzzer<CS, F, OF, IF>
 where
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
     EM: EventFirer<State = S>,
@@ -491,8 +495,8 @@ where
     }
 }
 
-impl<CS, E, EM, F, OF, S> EvaluatorObservers<E, EM, <S::Corpus as Corpus>::Input, S>
-    for StdFuzzer<CS, F, OF>
+impl<CS, E, EM, F, OF, S, IF> EvaluatorObservers<E, EM, <S::Corpus as Corpus>::Input, S>
+    for StdFuzzer<CS, F, OF, IF>
 where
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
     E: HasObservers + Executor<EM, Self, State = S>,
@@ -528,7 +532,43 @@ where
     }
 }
 
-impl<CS, E, EM, F, OF, S> Evaluator<E, EM, <S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, OF>
+trait InputFilter<I> {
+    fn should_execute(&mut self, input: &I) -> bool;
+}
+
+/// A pseudo-filter that will execute each input.
+#[derive(Debug)]
+pub struct NopInputFilter;
+impl<I> InputFilter<I> for NopInputFilter {
+    fn should_execute(&mut self, _input: &I) -> bool {
+        true
+    }
+}
+
+/// A filter that probabilistically prevents duplicate execution of the same input based on a bloom filter.
+#[cfg(feature = "std")]
+#[derive(Debug)]
+pub struct BloomInputFilter<I> {
+    bloom: Bloom<I>,
+}
+
+#[cfg(feature = "std")]
+impl<I> BloomInputFilter<I> {
+    fn new(items_count: usize, fp_p: f64) -> Result<Self, Error> {
+        let bloom = Bloom::new_for_fp_rate(items_count, fp_p).map_err(Error::illegal_argument)?;
+        Ok(Self { bloom })
+    }
+}
+
+#[cfg(feature = "std")]
+impl<I: Hash> InputFilter<I> for BloomInputFilter<I> {
+    fn should_execute(&mut self, input: &I) -> bool {
+        !self.bloom.check_and_set(input)
+    }
+}
+
+impl<CS, E, EM, F, OF, S, IF> Evaluator<E, EM, <S::Corpus as Corpus>::Input, S>
+    for StdFuzzer<CS, F, OF, IF>
 where
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
     E: HasObservers + Executor<EM, Self, State = S>,
@@ -545,6 +585,7 @@ where
         + UsesInput<Input = <S::Corpus as Corpus>::Input>,
     <S::Corpus as Corpus>::Input: Input,
     S::Solutions: Corpus<Input = <S::Corpus as Corpus>::Input>,
+    IF: InputFilter<<S::Corpus as Corpus>::Input>,
 {
     /// Process one input, adding to the respective corpora if needed and firing the right events
     #[inline]
@@ -556,7 +597,11 @@ where
         input: <S::Corpus as Corpus>::Input,
         send_events: bool,
     ) -> Result<(ExecuteInputResult, Option<CorpusId>), Error> {
-        self.evaluate_input_with_observers(state, executor, manager, input, send_events)
+        if self.input_filter.should_execute(&input) {
+            self.evaluate_input_with_observers(state, executor, manager, input, send_events)
+        } else {
+            Ok((ExecuteInputResult::None, None))
+        }
     }
     fn add_disabled_input(
         &mut self,
@@ -668,7 +713,7 @@ where
     }
 }
 
-impl<CS, E, EM, F, OF, S, ST> Fuzzer<E, EM, S, ST> for StdFuzzer<CS, F, OF>
+impl<CS, E, EM, F, OF, S, ST, IF> Fuzzer<E, EM, S, ST> for StdFuzzer<CS, F, OF, IF>
 where
     CS: Scheduler<S::Input, S>,
     E: UsesState<State = S>,
@@ -792,16 +837,40 @@ where
     }
 }
 
-impl<CS, F, OF> StdFuzzer<CS, F, OF> {
+impl<CS, F, OF> StdFuzzer<CS, F, OF, NopInputFilter> {
     /// Create a new `StdFuzzer` with standard behavior.
     pub fn new(scheduler: CS, feedback: F, objective: OF) -> Self {
         Self {
             scheduler,
             feedback,
             objective,
+            input_filter: NopInputFilter,
         }
     }
 }
+impl<CS, F, OF, I> StdFuzzer<CS, F, OF, BloomInputFilter<I>> {
+    /// Create a new [`StdFuzzer`], which, with a certain certainty, executes each input only once.
+    ///
+    /// This is achieved by hashing each input and using a bloom filter to differentiate inputs.
+    ///
+    /// Use this implementation if hashing each input is very fast compared to executing potential duplicate inputs.
+    pub fn new_with_bloom_filter(
+        scheduler: CS,
+        feedback: F,
+        objective: OF,
+        items_count: usize,
+        fp_p: f64,
+    ) -> Result<Self, Error> {
+        let input_filter = BloomInputFilter::new(items_count, fp_p)?;
+
+        Ok(Self {
+            scheduler,
+            feedback,
+            objective,
+            input_filter,
+        })
+    }
+}
 
 /// Structs with this trait will execute an input
 pub trait ExecutesInput<E, EM, I, S> {
@@ -815,8 +884,8 @@ pub trait ExecutesInput<E, EM, I, S> {
     ) -> Result<ExitKind, Error>;
 }
 
-impl<CS, E, EM, F, OF, S> ExecutesInput<E, EM, <S::Corpus as Corpus>::Input, S>
-    for StdFuzzer<CS, F, OF>
+impl<CS, E, EM, F, OF, S, IF> ExecutesInput<E, EM, <S::Corpus as Corpus>::Input, S>
+    for StdFuzzer<CS, F, OF, IF>
 where
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
     E: Executor<EM, Self, State = S> + HasObservers,

diff --git a/libafl/src/mutators/hash.rs b/libafl/src/mutators/hash.rs
@@ -0,0 +1,80 @@
+//! A wrapper around a [`Mutator`] that ensures an input really changed [`MutationResult::Mutated`]
+//! by hashing pre- and post-mutation
+use std::{borrow::Cow, hash::Hash};
+
+use libafl_bolts::{generic_hash_std, Error, Named};
+
+use super::{MutationResult, Mutator};
+
+/// A wrapper around a [`Mutator`] that ensures an input really changed [`MutationResult::Mutated`]
+/// by hashing pre- and post-mutation
+#[derive(Debug)]
+pub struct HashMutator<M> {
+    inner: M,
+    name: Cow<'static, str>,
+}
+
+impl<M> HashMutator<M>
+where
+    M: Named,
+{
+    /// Create a new [`HashMutator`]
+    pub fn new(inner: M) -> Self {
+        let name = Cow::Owned(format!("HashMutator<{}>", inner.name().clone()));
+        Self { inner, name }
+    }
+}
+
+impl<M, I, S> Mutator<I, S> for HashMutator<M>
+where
+    I: Hash,
+    M: Mutator<I, S>,
+{
+    fn mutate(&mut self, state: &mut S, input: &mut I) -> Result<MutationResult, Error> {
+        let before = generic_hash_std(input);
+        self.inner.mutate(state, input)?;
+        if before == generic_hash_std(input) {
+            Ok(MutationResult::Skipped)
+        } else {
+            Ok(MutationResult::Mutated)
+        }
+    }
+}
+
+impl<M> Named for HashMutator<M> {
+    fn name(&self) -> &Cow<'static, str> {
+        &self.name
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{
+        inputs::BytesInput,
+        mutators::{BytesSetMutator, HashMutator, MutationResult, Mutator},
+        state::NopState,
+    };
+
+    #[test]
+    fn not_mutated() {
+        let mut state: NopState<BytesInput> = NopState::new();
+        let mut inner = BytesSetMutator::new();
+
+        let mut input = BytesInput::new(vec![0; 5]);
+
+        // nothing changed, yet `MutationResult::Mutated` was reported
+        assert_eq!(
+            MutationResult::Mutated,
+            inner.mutate(&mut state, &mut input).unwrap()
+        );
+        assert_eq!(BytesInput::new(vec![0; 5]), input);
+
+        // now it is correctly reported as `MutationResult::Skipped`
+        let mut hash_mutator = HashMutator::new(inner);
+        assert_eq!(
+            MutationResult::Skipped,
+            hash_mutator.mutate(&mut state, &mut input).unwrap()
+        );
+        assert_eq!(BytesInput::new(vec![0; 5]), input);
+    }
+}
diff --git a/libafl/src/mutators/mod.rs b/libafl/src/mutators/mod.rs
@@ -28,6 +28,11 @@ pub use mapping::*;
 pub mod tuneable;
 pub use tuneable::*;
 
+#[cfg(feature = "std")]
+pub mod hash;
+#[cfg(feature = "std")]
+pub use hash::*;
+
 #[cfg(feature = "unicode")]
 pub mod unicode;
 #[cfg(feature = "unicode")]
@@ -84,12 +89,14 @@ impl From<i32> for MutationId {
     }
 }
 
-/// The result of a mutation.
-/// If the mutation got skipped, the target
-/// will not be executed with the returned input.
+/// Result of the mutation.
+///
+/// [`MutationResult::Skipped`] does not necessarily mean that the input changed,
+/// just that the mutator did something. For slow targets, consider wrapping your
+/// mutator in a [`hash::HashMutator`].
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum MutationResult {
-    /// The [`Mutator`] mutated this `Input`.
+    /// The [`Mutator`] executed on this `Input`. It may still be the same.
     Mutated,
     /// The [`Mutator`] did not mutate this `Input`. It was `Skipped`.
     Skipped,

diff --git a/libafl_bolts/src/shmem.rs b/libafl_bolts/src/shmem.rs
@@ -626,10 +626,7 @@ where
 pub mod unix_shmem {
     /// Mmap [`ShMem`] for Unix
     #[cfg(not(target_os = "android"))]
-    pub use default::MmapShMem;
-    /// Mmap [`ShMemProvider`] for Unix
-    #[cfg(not(target_os = "android"))]
-    pub use default::MmapShMemProvider;
+    pub use default::{MmapShMem, MmapShMemProvider, MAX_MMAP_FILENAME_LEN};
 
     #[cfg(doc)]
     use crate::shmem::{ShMem, ShMemProvider};
@@ -669,7 +666,8 @@ pub mod unix_shmem {
             Error,
         };
 
-        const MAX_MMAP_FILENAME_LEN: usize = 20;
+        /// The max number of bytes used when generating names for [`MmapShMem`]s.
+        pub const MAX_MMAP_FILENAME_LEN: usize = 20;
 
         /// Mmap-based The sharedmap impl for unix using [`shm_open`] and [`mmap`].
         /// Default on `MacOS` and `iOS`, where we need a central point to unmap