diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8193411f87..fde575922d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Set the tag GC interval to 1 on linux + if: runner.os == 'macOS' + run: echo "MIRIFLAGS=-Zmiri-tag-gc=1" >> $GITHUB_ENV + # We install gnu-tar because BSD tar is buggy on macOS builders of GHA. # See . - name: Install GNU tar diff --git a/README.md b/README.md index 8e96338a86..120ce82e60 100644 --- a/README.md +++ b/README.md @@ -323,6 +323,10 @@ environment variable. We first document the most relevant and most commonly used ensure alignment. (The standard library `align_to` method works fine in both modes; under symbolic alignment it only fills the middle slice when the allocation guarantees sufficient alignment.) +* `-Zmiri-tag-gc=` configures how often the pointer tag garbage collector runs. The default + is to search for and remove unreachable tags once every `10,000` basic blocks. Setting this to + `0` disables the garbage collector, which causes some programs to have explosive memory usage + and/or super-linear runtime. The remaining flags are for advanced use only, and more likely to change or be removed. Some of these are **unsound**, which means they can lead diff --git a/ci.sh b/ci.sh index d70feec66f..aa322e54a3 100755 --- a/ci.sh +++ b/ci.sh @@ -31,7 +31,7 @@ function run_tests { # optimizations up all the way). # Optimizations change diagnostics (mostly backtraces), so we don't check them #FIXME(#2155): we want to only run the pass and panic tests here, not the fail tests. - MIRIFLAGS="-O -Zmir-opt-level=4" MIRI_SKIP_UI_CHECKS=1 ./miri test -- tests/{pass,panic} + MIRIFLAGS="${MIRIFLAGS:-} -O -Zmir-opt-level=4" MIRI_SKIP_UI_CHECKS=1 ./miri test -- tests/{pass,panic} fi ## test-cargo-miri diff --git a/src/bin/miri.rs b/src/bin/miri.rs index 2684ad7ff3..d9e91951a9 100644 --- a/src/bin/miri.rs +++ b/src/bin/miri.rs @@ -521,6 +521,12 @@ fn main() { Err(err) => show_error!("-Zmiri-report-progress requires a `u32`: {}", err), }; miri_config.report_progress = Some(interval); + } else if let Some(param) = arg.strip_prefix("-Zmiri-tag-gc=") { + let interval = match param.parse::() { + Ok(i) => i, + Err(err) => show_error!("-Zmiri-tag-gc requires a `u32`: {}", err), + }; + miri_config.gc_interval = interval; } else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") { miri_config.measureme_out = Some(param.to_string()); } else if let Some(param) = arg.strip_prefix("-Zmiri-backtrace=") { diff --git a/src/concurrency/thread.rs b/src/concurrency/thread.rs index 38c30f44c7..19da0fc678 100644 --- a/src/concurrency/thread.rs +++ b/src/concurrency/thread.rs @@ -289,6 +289,10 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> { &mut self.threads[self.active_thread].stack } + pub fn iter(&self) -> impl Iterator> { + self.threads.iter() + } + pub fn all_stacks( &self, ) -> impl Iterator>]> { diff --git a/src/eval.rs b/src/eval.rs index bf04e42713..8cdb2876f1 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -132,6 +132,8 @@ pub struct MiriConfig { /// The location of a shared object file to load when calling external functions /// FIXME! consider allowing users to specify paths to multiple SO files, or to a directory pub external_so_file: Option, + /// Run a garbage collector for SbTags every N basic blocks. + pub gc_interval: u32, } impl Default for MiriConfig { @@ -164,6 +166,7 @@ impl Default for MiriConfig { report_progress: None, retag_fields: false, external_so_file: None, + gc_interval: 10_000, } } } diff --git a/src/lib.rs b/src/lib.rs index bd0cb80570..4fb6704165 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,6 +62,7 @@ mod operator; mod range_map; mod shims; mod stacked_borrows; +mod tag_gc; // Establish a "crate-wide prelude": we often import `crate::*`. @@ -110,6 +111,7 @@ pub use crate::range_map::RangeMap; pub use crate::stacked_borrows::{ CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag, Stack, Stacks, }; +pub use crate::tag_gc::EvalContextExt as _; /// Insert rustc arguments at the beginning of the argument list that Miri wants to be /// set per default, for maximal validation power. diff --git a/src/machine.rs b/src/machine.rs index b7624ac592..60fe2a91ad 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -394,6 +394,11 @@ pub struct Evaluator<'mir, 'tcx> { /// Handle of the optional shared object file for external functions. pub external_so_lib: Option<(libloading::Library, std::path::PathBuf)>, + + /// Run a garbage collector for SbTags every N basic blocks. + pub(crate) gc_interval: u32, + /// The number of blocks that passed since the last SbTag GC pass. + pub(crate) since_gc: u32, } impl<'mir, 'tcx> Evaluator<'mir, 'tcx> { @@ -469,6 +474,8 @@ impl<'mir, 'tcx> Evaluator<'mir, 'tcx> { lib_file_path.clone(), ) }), + gc_interval: config.gc_interval, + since_gc: 0, } } @@ -1008,6 +1015,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { fn before_terminator(ecx: &mut InterpCx<'mir, 'tcx, Self>) -> InterpResult<'tcx> { ecx.machine.basic_block_count += 1u64; // a u64 that is only incremented by 1 will "never" overflow + ecx.machine.since_gc += 1; // Possibly report our progress. if let Some(report_progress) = ecx.machine.report_progress { if ecx.machine.basic_block_count % u64::from(report_progress) == 0 { @@ -1016,6 +1024,16 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> { }); } } + + // Search for SbTags to find all live pointers, then remove all other tags from borrow + // stacks. + // When debug assertions are enabled, run the GC as often as possible so that any cases + // where it mistakenly removes an important tag become visible. + if ecx.machine.gc_interval > 0 && ecx.machine.since_gc >= ecx.machine.gc_interval { + ecx.machine.since_gc = 0; + ecx.garbage_collect_tags()?; + } + // These are our preemption points. ecx.maybe_preempt_active_thread(); Ok(()) diff --git a/src/shims/tls.rs b/src/shims/tls.rs index f0f1d0e52b..343fa05712 100644 --- a/src/shims/tls.rs +++ b/src/shims/tls.rs @@ -233,6 +233,12 @@ impl<'tcx> TlsData<'tcx> { data.remove(&thread_id); } } + + pub fn iter(&self, mut visitor: impl FnMut(&Scalar)) { + for scalar in self.keys.values().flat_map(|v| v.data.values()) { + visitor(scalar); + } + } } impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {} diff --git a/src/stacked_borrows/mod.rs b/src/stacked_borrows/mod.rs index 4f7914c5fb..9257628456 100644 --- a/src/stacked_borrows/mod.rs +++ b/src/stacked_borrows/mod.rs @@ -80,6 +80,8 @@ pub struct Stacks { history: AllocHistory, /// The set of tags that have been exposed inside this allocation. exposed_tags: FxHashSet, + /// Whether this memory has been modified since the last time the tag GC ran + modified_since_last_gc: bool, } /// Extra global state, available to the memory access hooks. @@ -422,6 +424,7 @@ impl<'tcx> Stack { let item = self.get(idx).unwrap(); Stack::item_popped(&item, global, dcx)?; } + Ok(()) } @@ -496,6 +499,20 @@ impl<'tcx> Stack { } // # Stacked Borrows Core End +/// Integration with the SbTag garbage collector +impl Stacks { + pub fn remove_unreachable_tags(&mut self, live_tags: &FxHashSet) { + if self.modified_since_last_gc { + for stack in self.stacks.iter_mut_all() { + if stack.len() > 64 { + stack.retain(live_tags); + } + } + self.modified_since_last_gc = false; + } + } +} + /// Map per-stack operations to higher-level per-location-range operations. impl<'tcx> Stacks { /// Creates a new stack with an initial tag. For diagnostic purposes, we also need to know @@ -514,6 +531,7 @@ impl<'tcx> Stacks { stacks: RangeMap::new(size, stack), history: AllocHistory::new(id, item, current_span), exposed_tags: FxHashSet::default(), + modified_since_last_gc: false, } } @@ -528,6 +546,7 @@ impl<'tcx> Stacks { &mut FxHashSet, ) -> InterpResult<'tcx>, ) -> InterpResult<'tcx> { + self.modified_since_last_gc = true; for (offset, stack) in self.stacks.iter_mut(range.start, range.size) { let mut dcx = dcx_builder.build(&mut self.history, offset); f(stack, &mut dcx, &mut self.exposed_tags)?; diff --git a/src/stacked_borrows/stack.rs b/src/stacked_borrows/stack.rs index 4a9a13d35b..494ea08b56 100644 --- a/src/stacked_borrows/stack.rs +++ b/src/stacked_borrows/stack.rs @@ -39,6 +39,61 @@ pub struct Stack { unique_range: Range, } +impl Stack { + pub fn retain(&mut self, tags: &FxHashSet) { + let mut first_removed = None; + + let mut read_idx = 1; + let mut write_idx = 1; + while read_idx < self.borrows.len() { + let left = self.borrows[read_idx - 1]; + let this = self.borrows[read_idx]; + let should_keep = match this.perm() { + // SharedReadWrite is the simplest case, if it's unreachable we can just remove it. + Permission::SharedReadWrite => tags.contains(&this.tag()), + // Only retain a Disabled tag if it is terminating a SharedReadWrite block. + Permission::Disabled => left.perm() == Permission::SharedReadWrite, + // Unique and SharedReadOnly can terminate a SharedReadWrite block, so only remove + // them if they are both unreachable and not directly after a SharedReadWrite. + Permission::Unique | Permission::SharedReadOnly => + left.perm() == Permission::SharedReadWrite || tags.contains(&this.tag()), + }; + + if should_keep { + if read_idx != write_idx { + self.borrows[write_idx] = self.borrows[read_idx]; + } + write_idx += 1; + } else if first_removed.is_none() { + first_removed = Some(read_idx); + } + + read_idx += 1; + } + self.borrows.truncate(write_idx); + + #[cfg(not(feature = "stack-cache"))] + drop(first_removed); // This is only needed for the stack-cache + + #[cfg(feature = "stack-cache")] + if let Some(first_removed) = first_removed { + // Either end of unique_range may have shifted, all we really know is that we can't + // have introduced a new Unique. + if !self.unique_range.is_empty() { + self.unique_range = 0..self.len(); + } + + // Replace any Items which have been collected with the base item, a known-good value. + for i in 0..CACHE_LEN { + if self.cache.idx[i] >= first_removed { + self.cache.items[i] = self.borrows[0]; + self.cache.idx[i] = 0; + } + } + } + } +} + /// A very small cache of searches of a borrow stack, mapping `Item`s to their position in said stack. /// /// It may seem like maintaining this cache is a waste for small stacks, but @@ -105,14 +160,11 @@ impl<'tcx> Stack { // Check that the unique_range is a valid index into the borrow stack. // This asserts that the unique_range's start <= end. - let uniques = &self.borrows[self.unique_range.clone()]; + let _uniques = &self.borrows[self.unique_range.clone()]; - // Check that the start of the unique_range is precise. - if let Some(first_unique) = uniques.first() { - assert_eq!(first_unique.perm(), Permission::Unique); - } - // We cannot assert that the unique range is exact on the upper end. - // When we pop items within the unique range, setting the end of the range precisely + // We cannot assert that the unique range is precise. + // Both ends may shift around when `Stack::retain` is called. Additionally, + // when we pop items within the unique range, setting the end of the range precisely // requires doing a linear search of the borrow stack, which is exactly the kind of // operation that all this caching exists to avoid. } diff --git a/src/tag_gc.rs b/src/tag_gc.rs new file mode 100644 index 0000000000..0402e4d35d --- /dev/null +++ b/src/tag_gc.rs @@ -0,0 +1,117 @@ +use crate::*; +use rustc_data_structures::fx::FxHashSet; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {} +pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { + fn garbage_collect_tags(&mut self) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + // No reason to do anything at all if stacked borrows is off. + if this.machine.stacked_borrows.is_none() { + return Ok(()); + } + + let mut tags = FxHashSet::default(); + + for thread in this.machine.threads.iter() { + if let Some(Scalar::Ptr( + Pointer { provenance: Provenance::Concrete { sb, .. }, .. }, + _, + )) = thread.panic_payload + { + tags.insert(sb); + } + } + + self.find_tags_in_tls(&mut tags); + self.find_tags_in_memory(&mut tags); + self.find_tags_in_locals(&mut tags)?; + + self.remove_unreachable_tags(tags); + + Ok(()) + } + + fn find_tags_in_tls(&mut self, tags: &mut FxHashSet) { + let this = self.eval_context_mut(); + this.machine.tls.iter(|scalar| { + if let Scalar::Ptr(Pointer { provenance: Provenance::Concrete { sb, .. }, .. }, _) = + scalar + { + tags.insert(*sb); + } + }); + } + + fn find_tags_in_memory(&mut self, tags: &mut FxHashSet) { + let this = self.eval_context_mut(); + this.memory.alloc_map().iter(|it| { + for (_id, (_kind, alloc)) in it { + for (_size, prov) in alloc.provenance().iter() { + if let Provenance::Concrete { sb, .. } = prov { + tags.insert(*sb); + } + } + } + }); + } + + fn find_tags_in_locals(&mut self, tags: &mut FxHashSet) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + for frame in this.machine.threads.all_stacks().flatten() { + // Handle the return place of each frame + if let Ok(return_place) = frame.return_place.try_as_mplace() { + if let Some(Provenance::Concrete { sb, .. }) = return_place.ptr.provenance { + tags.insert(sb); + } + } + + for local in frame.locals.iter() { + let LocalValue::Live(value) = local.value else { + continue; + }; + match value { + Operand::Immediate(Immediate::Scalar(Scalar::Ptr(ptr, _))) => + if let Provenance::Concrete { sb, .. } = ptr.provenance { + tags.insert(sb); + }, + Operand::Immediate(Immediate::ScalarPair(s1, s2)) => { + if let Scalar::Ptr(ptr, _) = s1 { + if let Provenance::Concrete { sb, .. } = ptr.provenance { + tags.insert(sb); + } + } + if let Scalar::Ptr(ptr, _) = s2 { + if let Provenance::Concrete { sb, .. } = ptr.provenance { + tags.insert(sb); + } + } + } + Operand::Indirect(MemPlace { ptr, .. }) => { + if let Some(Provenance::Concrete { sb, .. }) = ptr.provenance { + tags.insert(sb); + } + } + Operand::Immediate(Immediate::Uninit) + | Operand::Immediate(Immediate::Scalar(Scalar::Int(_))) => {} + } + } + } + + Ok(()) + } + + fn remove_unreachable_tags(&mut self, tags: FxHashSet) { + let this = self.eval_context_mut(); + this.memory.alloc_map().iter(|it| { + for (_id, (_kind, alloc)) in it { + alloc + .extra + .stacked_borrows + .as_ref() + .unwrap() + .borrow_mut() + .remove_unreachable_tags(&tags); + } + }); + } +}