Auto merge of #2479 - saethlin:tag-gc, r=oli-obk

Implement a garbage collector for tags The general approach here is to scan TLS, all locals, and the main memory map for all provenance, accumulating a `HashSet` of all pointer tags which are stored anywhere (we also have a special case for panic payloads). Then we iterate over every borrow stack and remove tags which are not in said `HashSet`, or which could be terminating a SRW block. Runtime of benchmarks decreases by between 17% and 81%. GC off: ``` Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/backtraces/Cargo.toml Time (mean ± σ): 7.080 s ± 0.249 s [User: 6.870 s, System: 0.202 s] Range (min … max): 6.933 s … 7.521 s 5 runs Warning: Statistical outliers were detected. Consider re-running this benchmark on a quiet PC without any interferences from other programs. It might help to use the '--warmup' or '--prepare' options. Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/mse/Cargo.toml Time (mean ± σ): 1.875 s ± 0.031 s [User: 1.630 s, System: 0.245 s] Range (min … max): 1.825 s … 1.910 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/serde1/Cargo.toml Time (mean ± σ): 2.785 s ± 0.075 s [User: 2.536 s, System: 0.168 s] Range (min … max): 2.698 s … 2.851 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/serde2/Cargo.toml Time (mean ± σ): 6.267 s ± 0.066 s [User: 6.072 s, System: 0.190 s] Range (min … max): 6.152 s … 6.314 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/slice-get-unchecked/Cargo.toml Time (mean ± σ): 4.733 s ± 0.080 s [User: 4.177 s, System: 0.513 s] Range (min … max): 4.681 s … 4.874 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/unicode/Cargo.toml Time (mean ± σ): 3.770 s ± 0.034 s [User: 3.549 s, System: 0.211 s] Range (min … max): 3.724 s … 3.819 s 5 runs ``` GC on: ``` Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/backtraces/Cargo.toml Time (mean ± σ): 5.886 s ± 0.054 s [User: 5.696 s, System: 0.182 s] Range (min … max): 5.799 s … 5.937 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/mse/Cargo.toml Time (mean ± σ): 936.4 ms ± 7.0 ms [User: 815.4 ms, System: 119.6 ms] Range (min … max): 925.7 ms … 945.0 ms 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/serde1/Cargo.toml Time (mean ± σ): 2.126 s ± 0.022 s [User: 1.979 s, System: 0.146 s] Range (min … max): 2.089 s … 2.143 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/serde2/Cargo.toml Time (mean ± σ): 4.242 s ± 0.066 s [User: 4.051 s, System: 0.160 s] Range (min … max): 4.196 s … 4.357 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/slice-get-unchecked/Cargo.toml Time (mean ± σ): 907.4 ms ± 2.4 ms [User: 788.6 ms, System: 118.2 ms] Range (min … max): 903.5 ms … 909.4 ms 5 runs Benchmark 1: cargo +miri miri run --manifest-path /home/ben/miri/bench-cargo-miri/unicode/Cargo.toml Time (mean ± σ): 1.821 s ± 0.011 s [User: 1.687 s, System: 0.133 s] Range (min … max): 1.802 s … 1.831 s 5 runs ``` But much more importantly for me this drops the peak memory usage of the first 1 minute of running `regex`'s tests from 103 GB to 1.7 GB. Thanks to `@oli-obk` for suggesting a while ago that this was possible and `@darksonn` for reminding me that we can just search through memory to find Provenance to locate pointers. Fixes #1367
rust-lang · Sep 13, 2022 · a00fa96 · a00fa96
2 parents 3886a63 + f59605c
commit a00fa96
Show file tree

Hide file tree

Showing 12 changed files with 243 additions and 8 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -34,6 +34,10 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
+      - name: Set the tag GC interval to 1 on linux
+        if: runner.os == 'macOS'
+        run: echo "MIRIFLAGS=-Zmiri-tag-gc=1" >> $GITHUB_ENV
+
       # We install gnu-tar because BSD tar is buggy on macOS builders of GHA.
       # See <https://github.com/actions/cache/issues/403>.
       - name: Install GNU tar

diff --git a/README.md b/README.md
@@ -323,6 +323,10 @@ environment variable. We first document the most relevant and most commonly used
   ensure alignment.  (The standard library `align_to` method works fine in both modes; under
   symbolic alignment it only fills the middle slice when the allocation guarantees sufficient
   alignment.)
+* `-Zmiri-tag-gc=<blocks>` configures how often the pointer tag garbage collector runs. The default
+  is to search for and remove unreachable tags once every `10,000` basic blocks. Setting this to
+  `0` disables the garbage collector, which causes some programs to have explosive memory usage
+  and/or super-linear runtime.
 
 The remaining flags are for advanced use only, and more likely to change or be removed.
 Some of these are **unsound**, which means they can lead

diff --git a/ci.sh b/ci.sh
@@ -31,7 +31,7 @@ function run_tests {
     # optimizations up all the way).
     # Optimizations change diagnostics (mostly backtraces), so we don't check them
     #FIXME(#2155): we want to only run the pass and panic tests here, not the fail tests.
-    MIRIFLAGS="-O -Zmir-opt-level=4" MIRI_SKIP_UI_CHECKS=1 ./miri test -- tests/{pass,panic}
+    MIRIFLAGS="${MIRIFLAGS:-} -O -Zmir-opt-level=4" MIRI_SKIP_UI_CHECKS=1 ./miri test -- tests/{pass,panic}
   fi
 
   ## test-cargo-miri

diff --git a/src/bin/miri.rs b/src/bin/miri.rs
@@ -521,6 +521,12 @@ fn main() {
                 Err(err) => show_error!("-Zmiri-report-progress requires a `u32`: {}", err),
             };
             miri_config.report_progress = Some(interval);
+        } else if let Some(param) = arg.strip_prefix("-Zmiri-tag-gc=") {
+            let interval = match param.parse::<u32>() {
+                Ok(i) => i,
+                Err(err) => show_error!("-Zmiri-tag-gc requires a `u32`: {}", err),
+            };
+            miri_config.gc_interval = interval;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") {
             miri_config.measureme_out = Some(param.to_string());
         } else if let Some(param) = arg.strip_prefix("-Zmiri-backtrace=") {

diff --git a/src/concurrency/thread.rs b/src/concurrency/thread.rs
@@ -289,6 +289,10 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
         &mut self.threads[self.active_thread].stack
     }
 
+    pub fn iter(&self) -> impl Iterator<Item = &Thread<'mir, 'tcx>> {
+        self.threads.iter()
+    }
+
     pub fn all_stacks(
         &self,
     ) -> impl Iterator<Item = &[Frame<'mir, 'tcx, Provenance, FrameData<'tcx>>]> {

diff --git a/src/eval.rs b/src/eval.rs
@@ -132,6 +132,8 @@ pub struct MiriConfig {
     /// The location of a shared object file to load when calling external functions
     /// FIXME! consider allowing users to specify paths to multiple SO files, or to a directory
     pub external_so_file: Option<PathBuf>,
+    /// Run a garbage collector for SbTags every N basic blocks.
+    pub gc_interval: u32,
 }
 
 impl Default for MiriConfig {
@@ -164,6 +166,7 @@ impl Default for MiriConfig {
             report_progress: None,
             retag_fields: false,
             external_so_file: None,
+            gc_interval: 10_000,
         }
     }
 }

diff --git a/src/lib.rs b/src/lib.rs
@@ -62,6 +62,7 @@ mod operator;
 mod range_map;
 mod shims;
 mod stacked_borrows;
+mod tag_gc;
 
 // Establish a "crate-wide prelude": we often import `crate::*`.
 
@@ -110,6 +111,7 @@ pub use crate::range_map::RangeMap;
 pub use crate::stacked_borrows::{
     CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag, Stack, Stacks,
 };
+pub use crate::tag_gc::EvalContextExt as _;
 
 /// Insert rustc arguments at the beginning of the argument list that Miri wants to be
 /// set per default, for maximal validation power.

diff --git a/src/machine.rs b/src/machine.rs
@@ -394,6 +394,11 @@ pub struct Evaluator<'mir, 'tcx> {
 
     /// Handle of the optional shared object file for external functions.
     pub external_so_lib: Option<(libloading::Library, std::path::PathBuf)>,
+
+    /// Run a garbage collector for SbTags every N basic blocks.
+    pub(crate) gc_interval: u32,
+    /// The number of blocks that passed since the last SbTag GC pass.
+    pub(crate) since_gc: u32,
 }
 
 impl<'mir, 'tcx> Evaluator<'mir, 'tcx> {
@@ -469,6 +474,8 @@ impl<'mir, 'tcx> Evaluator<'mir, 'tcx> {
                     lib_file_path.clone(),
                 )
             }),
+            gc_interval: config.gc_interval,
+            since_gc: 0,
         }
     }
 
@@ -1008,6 +1015,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
 
     fn before_terminator(ecx: &mut InterpCx<'mir, 'tcx, Self>) -> InterpResult<'tcx> {
         ecx.machine.basic_block_count += 1u64; // a u64 that is only incremented by 1 will "never" overflow
+        ecx.machine.since_gc += 1;
         // Possibly report our progress.
         if let Some(report_progress) = ecx.machine.report_progress {
             if ecx.machine.basic_block_count % u64::from(report_progress) == 0 {
@@ -1016,6 +1024,16 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 });
             }
         }
+
+        // Search for SbTags to find all live pointers, then remove all other tags from borrow
+        // stacks.
+        // When debug assertions are enabled, run the GC as often as possible so that any cases
+        // where it mistakenly removes an important tag become visible.
+        if ecx.machine.gc_interval > 0 && ecx.machine.since_gc >= ecx.machine.gc_interval {
+            ecx.machine.since_gc = 0;
+            ecx.garbage_collect_tags()?;
+        }
+
         // These are our preemption points.
         ecx.maybe_preempt_active_thread();
         Ok(())

diff --git a/src/shims/tls.rs b/src/shims/tls.rs
@@ -233,6 +233,12 @@ impl<'tcx> TlsData<'tcx> {
             data.remove(&thread_id);
         }
     }
+
+    pub fn iter(&self, mut visitor: impl FnMut(&Scalar<Provenance>)) {
+        for scalar in self.keys.values().flat_map(|v| v.data.values()) {
+            visitor(scalar);
+        }
+    }
 }
 
 impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}

diff --git a/src/stacked_borrows/mod.rs b/src/stacked_borrows/mod.rs
@@ -80,6 +80,8 @@ pub struct Stacks {
     history: AllocHistory,
     /// The set of tags that have been exposed inside this allocation.
     exposed_tags: FxHashSet<SbTag>,
+    /// Whether this memory has been modified since the last time the tag GC ran
+    modified_since_last_gc: bool,
 }
 
 /// Extra global state, available to the memory access hooks.
@@ -422,6 +424,7 @@ impl<'tcx> Stack {
             let item = self.get(idx).unwrap();
             Stack::item_popped(&item, global, dcx)?;
         }
+
         Ok(())
     }
 
@@ -496,6 +499,20 @@ impl<'tcx> Stack {
 }
 // # Stacked Borrows Core End
 
+/// Integration with the SbTag garbage collector
+impl Stacks {
+    pub fn remove_unreachable_tags(&mut self, live_tags: &FxHashSet<SbTag>) {
+        if self.modified_since_last_gc {
+            for stack in self.stacks.iter_mut_all() {
+                if stack.len() > 64 {
+                    stack.retain(live_tags);
+                }
+            }
+            self.modified_since_last_gc = false;
+        }
+    }
+}
+
 /// Map per-stack operations to higher-level per-location-range operations.
 impl<'tcx> Stacks {
     /// Creates a new stack with an initial tag. For diagnostic purposes, we also need to know
@@ -514,6 +531,7 @@ impl<'tcx> Stacks {
             stacks: RangeMap::new(size, stack),
             history: AllocHistory::new(id, item, current_span),
             exposed_tags: FxHashSet::default(),
+            modified_since_last_gc: false,
         }
     }
 
@@ -528,6 +546,7 @@ impl<'tcx> Stacks {
             &mut FxHashSet<SbTag>,
         ) -> InterpResult<'tcx>,
     ) -> InterpResult<'tcx> {
+        self.modified_since_last_gc = true;
         for (offset, stack) in self.stacks.iter_mut(range.start, range.size) {
             let mut dcx = dcx_builder.build(&mut self.history, offset);
             f(stack, &mut dcx, &mut self.exposed_tags)?;

diff --git a/src/stacked_borrows/stack.rs b/src/stacked_borrows/stack.rs
@@ -39,6 +39,61 @@ pub struct Stack {
     unique_range: Range<usize>,
 }
 
+impl Stack {
+    pub fn retain(&mut self, tags: &FxHashSet<SbTag>) {
+        let mut first_removed = None;
+
+        let mut read_idx = 1;
+        let mut write_idx = 1;
+        while read_idx < self.borrows.len() {
+            let left = self.borrows[read_idx - 1];
+            let this = self.borrows[read_idx];
+            let should_keep = match this.perm() {
+                // SharedReadWrite is the simplest case, if it's unreachable we can just remove it.
+                Permission::SharedReadWrite => tags.contains(&this.tag()),
+                // Only retain a Disabled tag if it is terminating a SharedReadWrite block.
+                Permission::Disabled => left.perm() == Permission::SharedReadWrite,
+                // Unique and SharedReadOnly can terminate a SharedReadWrite block, so only remove
+                // them if they are both unreachable and not directly after a SharedReadWrite.
+                Permission::Unique | Permission::SharedReadOnly =>
+                    left.perm() == Permission::SharedReadWrite || tags.contains(&this.tag()),
+            };
+
+            if should_keep {
+                if read_idx != write_idx {
+                    self.borrows[write_idx] = self.borrows[read_idx];
+                }
+                write_idx += 1;
+            } else if first_removed.is_none() {
+                first_removed = Some(read_idx);
+            }
+
+            read_idx += 1;
+        }
+        self.borrows.truncate(write_idx);
+
+        #[cfg(not(feature = "stack-cache"))]
+        drop(first_removed); // This is only needed for the stack-cache
+
+        #[cfg(feature = "stack-cache")]
+        if let Some(first_removed) = first_removed {
+            // Either end of unique_range may have shifted, all we really know is that we can't
+            // have introduced a new Unique.
+            if !self.unique_range.is_empty() {
+                self.unique_range = 0..self.len();
+            }
+
+            // Replace any Items which have been collected with the base item, a known-good value.
+            for i in 0..CACHE_LEN {
+                if self.cache.idx[i] >= first_removed {
+                    self.cache.items[i] = self.borrows[0];
+                    self.cache.idx[i] = 0;
+                }
+            }
+        }
+    }
+}
+
 /// A very small cache of searches of a borrow stack, mapping `Item`s to their position in said stack.
 ///
 /// It may seem like maintaining this cache is a waste for small stacks, but
@@ -105,14 +160,11 @@ impl<'tcx> Stack {
 
         // Check that the unique_range is a valid index into the borrow stack.
         // This asserts that the unique_range's start <= end.
-        let uniques = &self.borrows[self.unique_range.clone()];
+        let _uniques = &self.borrows[self.unique_range.clone()];
 
-        // Check that the start of the unique_range is precise.
-        if let Some(first_unique) = uniques.first() {
-            assert_eq!(first_unique.perm(), Permission::Unique);
-        }
-        // We cannot assert that the unique range is exact on the upper end.
-        // When we pop items within the unique range, setting the end of the range precisely
+        // We cannot assert that the unique range is precise.
+        // Both ends may shift around when `Stack::retain` is called. Additionally,
+        // when we pop items within the unique range, setting the end of the range precisely
         // requires doing a linear search of the borrow stack, which is exactly the kind of
         // operation that all this caching exists to avoid.
     }

diff --git a/src/tag_gc.rs b/src/tag_gc.rs
@@ -0,0 +1,117 @@
+use crate::*;
+use rustc_data_structures::fx::FxHashSet;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
+pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
+    fn garbage_collect_tags(&mut self) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        // No reason to do anything at all if stacked borrows is off.
+        if this.machine.stacked_borrows.is_none() {
+            return Ok(());
+        }
+
+        let mut tags = FxHashSet::default();
+
+        for thread in this.machine.threads.iter() {
+            if let Some(Scalar::Ptr(
+                Pointer { provenance: Provenance::Concrete { sb, .. }, .. },
+                _,
+            )) = thread.panic_payload
+            {
+                tags.insert(sb);
+            }
+        }
+
+        self.find_tags_in_tls(&mut tags);
+        self.find_tags_in_memory(&mut tags);
+        self.find_tags_in_locals(&mut tags)?;
+
+        self.remove_unreachable_tags(tags);
+
+        Ok(())
+    }
+
+    fn find_tags_in_tls(&mut self, tags: &mut FxHashSet<SbTag>) {
+        let this = self.eval_context_mut();
+        this.machine.tls.iter(|scalar| {
+            if let Scalar::Ptr(Pointer { provenance: Provenance::Concrete { sb, .. }, .. }, _) =
+                scalar
+            {
+                tags.insert(*sb);
+            }
+        });
+    }
+
+    fn find_tags_in_memory(&mut self, tags: &mut FxHashSet<SbTag>) {
+        let this = self.eval_context_mut();
+        this.memory.alloc_map().iter(|it| {
+            for (_id, (_kind, alloc)) in it {
+                for (_size, prov) in alloc.provenance().iter() {
+                    if let Provenance::Concrete { sb, .. } = prov {
+                        tags.insert(*sb);
+                    }
+                }
+            }
+        });
+    }
+
+    fn find_tags_in_locals(&mut self, tags: &mut FxHashSet<SbTag>) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        for frame in this.machine.threads.all_stacks().flatten() {
+            // Handle the return place of each frame
+            if let Ok(return_place) = frame.return_place.try_as_mplace() {
+                if let Some(Provenance::Concrete { sb, .. }) = return_place.ptr.provenance {
+                    tags.insert(sb);
+                }
+            }
+
+            for local in frame.locals.iter() {
+                let LocalValue::Live(value) = local.value else {
+                continue;
+            };
+                match value {
+                    Operand::Immediate(Immediate::Scalar(Scalar::Ptr(ptr, _))) =>
+                        if let Provenance::Concrete { sb, .. } = ptr.provenance {
+                            tags.insert(sb);
+                        },
+                    Operand::Immediate(Immediate::ScalarPair(s1, s2)) => {
+                        if let Scalar::Ptr(ptr, _) = s1 {
+                            if let Provenance::Concrete { sb, .. } = ptr.provenance {
+                                tags.insert(sb);
+                            }
+                        }
+                        if let Scalar::Ptr(ptr, _) = s2 {
+                            if let Provenance::Concrete { sb, .. } = ptr.provenance {
+                                tags.insert(sb);
+                            }
+                        }
+                    }
+                    Operand::Indirect(MemPlace { ptr, .. }) => {
+                        if let Some(Provenance::Concrete { sb, .. }) = ptr.provenance {
+                            tags.insert(sb);
+                        }
+                    }
+                    Operand::Immediate(Immediate::Uninit)
+                    | Operand::Immediate(Immediate::Scalar(Scalar::Int(_))) => {}
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn remove_unreachable_tags(&mut self, tags: FxHashSet<SbTag>) {
+        let this = self.eval_context_mut();
+        this.memory.alloc_map().iter(|it| {
+            for (_id, (_kind, alloc)) in it {
+                alloc
+                    .extra
+                    .stacked_borrows
+                    .as_ref()
+                    .unwrap()
+                    .borrow_mut()
+                    .remove_unreachable_tags(&tags);
+            }
+        });
+    }
+}