From b0d297d40191decc0e951046633da608e16dc50c Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 24 Jul 2024 16:57:37 +0800 Subject: [PATCH 01/28] WIP: enumerate objects and regions --- src/mmtk.rs | 18 +++- src/policy/copyspace.rs | 7 +- src/policy/immix/block.rs | 7 ++ src/policy/immix/immixspace.rs | 7 +- src/policy/immortalspace.rs | 5 + src/policy/largeobjectspace.rs | 5 + src/policy/lockfreeimmortalspace.rs | 5 + src/policy/markcompactspace.rs | 5 + src/policy/marksweepspace/malloc_ms/global.rs | 5 + src/policy/marksweepspace/native_ms/block.rs | 7 ++ src/policy/marksweepspace/native_ms/global.rs | 5 + src/policy/space.rs | 13 +++ src/util/linear_scan.rs | 4 + src/util/mod.rs | 1 + src/util/object_enum.rs | 99 +++++++++++++++++++ src/util/treadmill.rs | 13 +++ 16 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 src/util/object_enum.rs diff --git a/src/mmtk.rs b/src/mmtk.rs index eea56e7e45..42f7699c63 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -13,7 +13,6 @@ use crate::util::heap::gc_trigger::GCTrigger; use crate::util::heap::layout::vm_layout::VMLayout; use crate::util::heap::layout::{self, Mmapper, VMMap}; use crate::util::heap::HeapMeta; -use crate::util::opaque_pointer::*; use crate::util::options::Options; use crate::util::reference_processor::ReferenceProcessors; #[cfg(feature = "sanity")] @@ -21,6 +20,7 @@ use crate::util::sanity::sanity_checker::SanityChecker; #[cfg(feature = "extreme_assertions")] use crate::util::slot_logger::SlotLogger; use crate::util::statistics::stats::Stats; +use crate::util::{opaque_pointer::*, ObjectReference}; use crate::vm::ReferenceGlue; use crate::vm::VMBinding; use std::cell::UnsafeCell; @@ -471,4 +471,20 @@ impl MMTK { pub fn get_options(&self) -> &Options { &self.options } + + /// Enumerate objects in all spaces in this MMTK instance. + /// The call-back function `f` is called for every object. + #[cfg(feature = "vo_bit")] + pub fn enumerate_objects(&self, f: F) + where + F: FnMut(ObjectReference), + { + use crate::util::object_enum; + + let mut enumerator = object_enum::ClosureObjectEnumerator::new(f); + let plan = self.get_plan(); + plan.for_each_space(&mut |space| { + space.enumerate_objects(&mut enumerator); + }) + } } diff --git a/src/policy/copyspace.rs b/src/policy/copyspace.rs index bf73255033..80f4962584 100644 --- a/src/policy/copyspace.rs +++ b/src/policy/copyspace.rs @@ -6,9 +6,10 @@ use crate::policy::sft::SFT; use crate::policy::space::{CommonSpace, Space}; use crate::scheduler::GCWorker; use crate::util::alloc::allocator::AllocatorContext; -use crate::util::copy::*; +use crate::util::{copy::*, object_enum}; use crate::util::heap::{MonotonePageResource, PageResource}; use crate::util::metadata::{extract_side_metadata, MetadataSpec}; +use crate::util::object_enum::ObjectEnumerator; use crate::util::object_forwarding; use crate::util::{Address, ObjectReference}; use crate::vm::*; @@ -133,6 +134,10 @@ impl Space for CopySpace { fn set_copy_for_sft_trace(&mut self, semantics: Option) { self.common.copy = semantics; } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); + } } impl crate::policy::gc_work::PolicyTraceObject for CopySpace { diff --git a/src/policy/immix/block.rs b/src/policy/immix/block.rs index 8c772924c8..85ed76ddef 100644 --- a/src/policy/immix/block.rs +++ b/src/policy/immix/block.rs @@ -10,6 +10,7 @@ use crate::util::metadata::side_metadata::{MetadataByteArrayRef, SideMetadataSpe use crate::util::metadata::vo_bit; #[cfg(feature = "object_pinning")] use crate::util::metadata::MetadataSpec; +use crate::util::object_enum::BlockMayHaveObjects; use crate::util::Address; use crate::vm::*; use std::sync::atomic::Ordering; @@ -86,6 +87,12 @@ impl Region for Block { } } +impl BlockMayHaveObjects for Block { + fn may_have_objects(&self) -> bool { + self.get_state() != BlockState::Unallocated + } +} + impl Block { /// Log pages in block pub const LOG_PAGES: usize = Self::LOG_BYTES - LOG_BYTES_IN_PAGE as usize; diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index 547ac6d794..ccf30b29bd 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -9,7 +9,6 @@ use crate::policy::sft_map::SFTMap; use crate::policy::space::{CommonSpace, Space}; use crate::util::alloc::allocator::AllocatorContext; use crate::util::constants::LOG_BYTES_IN_PAGE; -use crate::util::copy::*; use crate::util::heap::chunk_map::*; use crate::util::heap::BlockPageResource; use crate::util::heap::PageResource; @@ -18,7 +17,9 @@ use crate::util::metadata::side_metadata::SideMetadataSpec; #[cfg(feature = "vo_bit")] use crate::util::metadata::vo_bit; use crate::util::metadata::{self, MetadataSpec}; +use crate::util::object_enum::ObjectEnumerator; use crate::util::object_forwarding; +use crate::util::{copy::*, object_enum}; use crate::util::{Address, ObjectReference}; use crate::vm::*; use crate::{ @@ -189,6 +190,10 @@ impl Space for ImmixSpace { fn set_copy_for_sft_trace(&mut self, _semantics: Option) { panic!("We do not use SFT to trace objects for Immix. set_copy_context() cannot be used.") } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_chunk_map::(enumerator, &self.chunk_map); + } } impl crate::policy::gc_work::PolicyTraceObject for ImmixSpace { diff --git a/src/policy/immortalspace.rs b/src/policy/immortalspace.rs index 6d3e63922d..2ebb14e4d4 100644 --- a/src/policy/immortalspace.rs +++ b/src/policy/immortalspace.rs @@ -6,6 +6,7 @@ use crate::util::address::Address; use crate::util::heap::{MonotonePageResource, PageResource}; use crate::util::metadata::mark_bit::MarkState; +use crate::util::object_enum::{self, ObjectEnumerator}; use crate::util::{metadata, ObjectReference}; use crate::plan::{ObjectQueue, VectorObjectQueue}; @@ -112,6 +113,10 @@ impl Space for ImmortalSpace { fn release_multiple_pages(&mut self, _start: Address) { panic!("immortalspace only releases pages enmasse") } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); + } } use crate::scheduler::GCWorker; diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index b64a5371c6..ac499b8901 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -8,6 +8,7 @@ use crate::policy::space::{CommonSpace, Space}; use crate::util::constants::BYTES_IN_PAGE; use crate::util::heap::{FreeListPageResource, PageResource}; use crate::util::metadata; +use crate::util::object_enum::ObjectEnumerator; use crate::util::opaque_pointer::*; use crate::util::treadmill::TreadMill; use crate::util::{Address, ObjectReference}; @@ -175,6 +176,10 @@ impl Space for LargeObjectSpace { fn release_multiple_pages(&mut self, start: Address) { self.pr.release_pages(start); } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + self.treadmill.enumerate_objects_coarse(enumerator); + } } use crate::scheduler::GCWorker; diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs index 858d07fd5f..179236cf9b 100644 --- a/src/policy/lockfreeimmortalspace.rs +++ b/src/policy/lockfreeimmortalspace.rs @@ -16,6 +16,7 @@ use crate::util::heap::VMRequest; use crate::util::memory::MmapStrategy; use crate::util::metadata::side_metadata::SideMetadataContext; use crate::util::metadata::side_metadata::SideMetadataSanity; +use crate::util::object_enum::ObjectEnumerator; use crate::util::opaque_pointer::*; use crate::util::ObjectReference; use crate::vm::VMBinding; @@ -166,6 +167,10 @@ impl Space for LockFreeImmortalSpace { side_metadata_sanity_checker .verify_metadata_context(std::any::type_name::(), &self.metadata) } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + enumerator.visit_address_range(self.start..(self.start + self.total_bytes)); + } } use crate::plan::{ObjectQueue, VectorObjectQueue}; diff --git a/src/policy/markcompactspace.rs b/src/policy/markcompactspace.rs index 9db05e56aa..bc0f5659c2 100644 --- a/src/policy/markcompactspace.rs +++ b/src/policy/markcompactspace.rs @@ -11,6 +11,7 @@ use crate::util::constants::LOG_BYTES_IN_WORD; use crate::util::copy::CopySemantics; use crate::util::heap::{MonotonePageResource, PageResource}; use crate::util::metadata::{extract_side_metadata, vo_bit}; +use crate::util::object_enum::{self, ObjectEnumerator}; use crate::util::{Address, ObjectReference}; use crate::{vm::*, ObjectQueue}; use atomic::Ordering; @@ -131,6 +132,10 @@ impl Space for MarkCompactSpace { fn release_multiple_pages(&mut self, _start: Address) { panic!("markcompactspace only releases pages enmasse") } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); + } } impl crate::policy::gc_work::PolicyTraceObject for MarkCompactSpace { diff --git a/src/policy/marksweepspace/malloc_ms/global.rs b/src/policy/marksweepspace/malloc_ms/global.rs index d95aa8437b..dbe386db5c 100644 --- a/src/policy/marksweepspace/malloc_ms/global.rs +++ b/src/policy/marksweepspace/malloc_ms/global.rs @@ -15,6 +15,7 @@ use crate::util::metadata::side_metadata::{ SideMetadataContext, SideMetadataSanity, SideMetadataSpec, }; use crate::util::metadata::MetadataSpec; +use crate::util::object_enum::ObjectEnumerator; use crate::util::opaque_pointer::*; use crate::util::Address; use crate::util::ObjectReference; @@ -229,6 +230,10 @@ impl Space for MallocSpace { side_metadata_sanity_checker .verify_metadata_context(std::any::type_name::(), &self.metadata) } + + fn enumerate_objects(&self, _enumerator: &mut dyn ObjectEnumerator) { + unimplemented!() + } } use crate::scheduler::GCWorker; diff --git a/src/policy/marksweepspace/native_ms/block.rs b/src/policy/marksweepspace/native_ms/block.rs index 6727430e9e..a150d974b5 100644 --- a/src/policy/marksweepspace/native_ms/block.rs +++ b/src/policy/marksweepspace/native_ms/block.rs @@ -7,6 +7,7 @@ use super::MarkSweepSpace; use crate::util::constants::LOG_BYTES_IN_PAGE; use crate::util::heap::chunk_map::*; use crate::util::linear_scan::Region; +use crate::util::object_enum::BlockMayHaveObjects; use crate::vm::ObjectModel; use crate::{ util::{ @@ -48,6 +49,12 @@ impl Region for Block { } } +impl BlockMayHaveObjects for Block { + fn may_have_objects(&self) -> bool { + self.get_state() != BlockState::Unallocated + } +} + impl Block { /// Log pages in block pub const LOG_PAGES: usize = Self::LOG_BYTES - LOG_BYTES_IN_PAGE as usize; diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs index 41773be244..30ebc63c0d 100644 --- a/src/policy/marksweepspace/native_ms/global.rs +++ b/src/policy/marksweepspace/native_ms/global.rs @@ -10,6 +10,7 @@ use crate::{ copy::CopySemantics, heap::{BlockPageResource, PageResource}, metadata::{self, side_metadata::SideMetadataSpec, MetadataSpec}, + object_enum::{self, ObjectEnumerator}, ObjectReference, }, vm::{ActivePlan, VMBinding}, @@ -246,6 +247,10 @@ impl Space for MarkSweepSpace { fn release_multiple_pages(&mut self, _start: crate::util::Address) { todo!() } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_chunk_map::(enumerator, &self.chunk_map); + } } impl crate::policy::gc_work::PolicyTraceObject for MarkSweepSpace { diff --git a/src/policy/space.rs b/src/policy/space.rs index c5663dfa01..634dca6d96 100644 --- a/src/policy/space.rs +++ b/src/policy/space.rs @@ -5,6 +5,7 @@ use crate::util::conversions::*; use crate::util::metadata::side_metadata::{ SideMetadataContext, SideMetadataSanity, SideMetadataSpec, }; +use crate::util::object_enum::ObjectEnumerator; use crate::util::Address; use crate::util::ObjectReference; @@ -348,6 +349,18 @@ pub trait Space: 'static + SFT + Sync + Downcast { side_metadata_sanity_checker .verify_metadata_context(std::any::type_name::(), &self.common().metadata) } + + /// Enumerate ranges of addresses that may contain objects. Used for enumerating objects in the + /// space. Implementors should call `f` for each address range that may contain objects, and + /// may ignore address ranges that are guaranteed not to include objects. The address range + /// will then be linearly scanned. + /// + /// # Implementation consideration + /// + /// Because `Space` is a trait object type and `f` is a `dyn` reference, each invocation of `f` + /// involves a dynamic dispatching. The overhead is OK if we call it a block at a time, but it + /// will be too costly if we call a `dyn` callback for each object. + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator); } /// Print the VM map for a space. diff --git a/src/util/linear_scan.rs b/src/util/linear_scan.rs index bf391c785d..6b144ad5db 100644 --- a/src/util/linear_scan.rs +++ b/src/util/linear_scan.rs @@ -108,6 +108,10 @@ pub trait Region: Copy + PartialEq + PartialOrd { fn end(&self) -> Address { self.start() + Self::BYTES } + /// Return the address range of the region. + fn as_range(&self) -> std::ops::Range
{ + self.start()..self.end() + } /// Return the next region after this one. fn next(&self) -> Self { self.next_nth(1) diff --git a/src/util/mod.rs b/src/util/mod.rs index 8036ad7ae1..4175d1d208 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -50,6 +50,7 @@ pub(crate) mod erase_vm; pub(crate) mod finalizable_processor; /// Logger initialization pub(crate) mod logger; +pub(crate) mod object_enum; /// Forwarding word in object copying. pub(crate) mod object_forwarding; /// Reference processing implementation. diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs new file mode 100644 index 0000000000..c569222c1b --- /dev/null +++ b/src/util/object_enum.rs @@ -0,0 +1,99 @@ +//! Helper types for object enumeration + +use crate::vm::VMBinding; + +use super::{ + heap::{ + chunk_map::{ChunkMap, ChunkState}, + MonotonePageResource, + }, + linear_scan::Region, + Address, ObjectReference, +}; + +/// A trait for enumerating objects in spaces. +/// +/// This is a trait object type, so we avoid using generics. Because this trait may be used as a +/// `&mut dyn`, we avoid the cost of dynamic dispatching by allowing the user to supply an address +/// range instead of a single object reference. The implementor of this trait will use linear +/// scanning to find objects in the range in batch. But if the space is too sparse (e.g. LOS) and +/// the cost of linear scanning is greater than the dynamic dispatching, use `visit_object` +/// directly. +pub trait ObjectEnumerator { + /// Visit a single object. + fn visit_object(&mut self, object: ObjectReference); + /// Visit an address range that may contain objects. + fn visit_address_range(&mut self, addr_range: std::ops::Range
); +} + +/// An implementation of `ObjectEnumerator` that wraps a callback. +pub struct ClosureObjectEnumerator +where + F: FnMut(ObjectReference), +{ + object_callback: F, +} + +impl ClosureObjectEnumerator +where + F: FnMut(ObjectReference), +{ + pub fn new(object_callback: F) -> Self { + Self { object_callback } + } +} + +impl ObjectEnumerator for ClosureObjectEnumerator +where + F: FnMut(ObjectReference), +{ + fn visit_object(&mut self, object: ObjectReference) { + (self.object_callback)(object); + } + + fn visit_address_range(&mut self, addr_range: std::ops::Range
) { + todo!() + } +} + +/// Allow querying if a block may have objects. `MarkSweepSpace` and `ImmixSpace` use different +/// `Block` types, and they have different block states. This trait lets both `Block` types provide +/// the same `may_have_objects` method. +pub(crate) trait BlockMayHaveObjects: Region { + /// Return `true` if the block may contain valid objects (objects with the VO bit set). Return + /// `false` otherwise. + /// + /// This function is used during object enumeration to filter out memory regions that do not + /// contain objects. Because object enumeration may happen at mutator time, and another mutators + /// may be allocating objects, this function only needs to reflect the state of the block at the + /// time of calling, and is allowed to conservatively return `true`. + fn may_have_objects(&self) -> bool; +} + +pub(crate) fn enumerate_blocks_from_chunk_map( + enumerator: &mut dyn ObjectEnumerator, + chunk_map: &ChunkMap, +) where + B: BlockMayHaveObjects, +{ + for chunk in chunk_map.all_chunks() { + if chunk_map.get(chunk) == ChunkState::Allocated { + for block in chunk.iter_region::() { + if block.may_have_objects() { + enumerator.visit_address_range(block.as_range()); + } + } + } + } +} + +pub(crate) fn enumerate_blocks_from_monotonic_page_resource( + enumerator: &mut dyn ObjectEnumerator, + pr: &MonotonePageResource, +) where + VM: VMBinding, +{ + for (start, size) in pr.iterate_allocated_regions() { + enumerator.visit_address_range(start..(start + size)); + } +} diff --git a/src/util/treadmill.rs b/src/util/treadmill.rs index e8cfe92d4f..cbc6aaf84a 100644 --- a/src/util/treadmill.rs +++ b/src/util/treadmill.rs @@ -4,6 +4,8 @@ use std::sync::Mutex; use crate::util::ObjectReference; +use super::object_enum::ObjectEnumerator; + pub struct TreadMill { from_space: Mutex>, to_space: Mutex>, @@ -99,6 +101,17 @@ impl TreadMill { trace!("Flipped from_space and to_space"); } } + + pub(crate) fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + let mut visit_objects = |set: &Mutex>| { + let set = set.lock().unwrap(); + for object in set.iter() { + enumerator.visit_object(*object); + } + }; + visit_objects(&self.alloc_nursery); + visit_objects(&self.to_space); + } } impl Default for TreadMill { From c69a68b884a6bb297bf9e1d9ccb95fb0257158c6 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 24 Jul 2024 22:19:22 +0800 Subject: [PATCH 02/28] WIP: linear scanning metadata --- src/mmtk.rs | 2 +- src/util/metadata/side_metadata/global.rs | 90 ++++++++++++++++++++++ src/util/metadata/side_metadata/helpers.rs | 53 +++++++++++++ src/util/object_enum.rs | 26 +++++-- 4 files changed, 162 insertions(+), 9 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index 42f7699c63..5f835e9c0d 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -481,7 +481,7 @@ impl MMTK { { use crate::util::object_enum; - let mut enumerator = object_enum::ClosureObjectEnumerator::new(f); + let mut enumerator = object_enum::ClosureObjectEnumerator::<_, VM>::new(f); let plan = self.get_plan(); plan.for_each_space(&mut |space| { space.enumerate_objects(&mut enumerator); diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 123698aa6e..c87c0967a0 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -8,6 +8,7 @@ use crate::util::metadata::metadata_val_traits::*; use crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC; use crate::util::Address; use num_traits::FromPrimitive; +use std::cell::RefCell; use std::fmt; use std::io::Result; use std::sync::atomic::{AtomicU8, Ordering}; @@ -1211,6 +1212,95 @@ impl SideMetadataSpec { res.get() .map(|addr| addr.align_down(1 << self.log_bytes_in_region)) } + + pub fn scan_non_zero_values( + &self, + data_start_addr: Address, + data_end_addr: Address, + visit_data: impl FnMut(Address), + ) { + if self.uses_contiguous_side_metadata() { + // Contiguous side metadata + self.scan_non_zero_values_fast::(data_start_addr, data_end_addr, visit_data); + } else { + // TODO: We should be able to optimize further for this case. However, we need to be careful that the side metadata + // is not contiguous, and we need to skip to the next chunk's side metadata when we search to a different chunk. + // This won't be used for VO bit, as VO bit is global and is always contiguous. So for now, I am not bothered to do it. + warn!("We are trying to search non zero bits in an discontiguous side metadata. The performance is slow, as MMTk does not optimize for this case."); + self.scan_non_zero_values_simple::(data_start_addr, data_end_addr, visit_data); + } + } + + pub fn scan_non_zero_values_simple( + &self, + data_start_addr: Address, + data_end_addr: Address, + mut visit_data: impl FnMut(Address), + ) { + let region_bytes = 1usize << self.log_bytes_in_region; + + let mut cursor = data_start_addr; + while cursor < data_end_addr { + debug_assert!(cursor.is_mapped()); + + // If we find non-zero value, just call back. + if !unsafe { self.load::(cursor).is_zero() } { + visit_data(cursor); + } + cursor += region_bytes; + } + } + + pub fn scan_non_zero_values_fast( + &self, + data_start_addr: Address, + data_end_addr: Address, + visit_data: impl FnMut(Address), + ) { + debug_assert!(self.uses_contiguous_side_metadata()); + + // Then figure out the start and end metadata address and bits. + let start_meta_addr = address_to_contiguous_meta_address(self, data_start_addr); + let start_meta_shift = meta_byte_lshift(self, data_start_addr); + let end_meta_addr = address_to_contiguous_meta_address(self, data_end_addr); + let end_meta_shift = meta_byte_lshift(self, data_end_addr); + + // FIXME: This is probably not going to perform well. We need to refactor + // `iterate_meta_bits` to change from two callbacks to one. + let cell = RefCell::new(visit_data); + + let check_bytes_forwards = |start: Address, end: Address| -> bool { + helpers::scan_non_zero_bits_in_metadata_bytes(start, end, &mut |addr, bit| { + (cell.borrow_mut())(helpers::contiguous_meta_address_to_address( + self, addr, bit as u8, + )); + }); + false + }; + let check_bits_forwards = |addr: Address, start_bit: u8, end_bit: u8| -> bool { + helpers::scan_non_zero_bits_in_metadata_bits( + addr, + start_bit as u32, + end_bit as u32, + &mut |addr, bit| { + (cell.borrow_mut())(helpers::contiguous_meta_address_to_address( + self, addr, bit as u8, + )); + }, + ); + false + }; + + Self::iterate_meta_bits( + start_meta_addr, + start_meta_shift, + end_meta_addr, + end_meta_shift, + false, + &check_bytes_forwards, + &check_bits_forwards, + ); + } } impl fmt::Debug for SideMetadataSpec { diff --git a/src/util/metadata/side_metadata/helpers.rs b/src/util/metadata/side_metadata/helpers.rs index b9bf197dfb..7a7009f538 100644 --- a/src/util/metadata/side_metadata/helpers.rs +++ b/src/util/metadata/side_metadata/helpers.rs @@ -260,6 +260,59 @@ fn find_last_non_zero_bit_in_u8(byte_value: u8) -> Option { } } +pub fn scan_non_zero_bits_in_metadata_bytes( + meta_start: Address, + meta_end: Address, + visit_bit: &mut impl FnMut(Address, u32), +) { + use crate::util::constants::BYTES_IN_ADDRESS; + + let mut cursor = meta_start; + while cursor < meta_end && !cursor.is_aligned_to(BYTES_IN_ADDRESS) { + let byte = unsafe { cursor.load::() }; + scan_non_zero_bits_in_metadata_word(cursor, byte as usize, visit_bit); + cursor += 1usize; + } + + while cursor + BYTES_IN_ADDRESS < meta_end { + let word = unsafe { cursor.load::() }; + scan_non_zero_bits_in_metadata_word(cursor, word, visit_bit); + cursor += BYTES_IN_ADDRESS; + } + + while cursor < meta_end { + let byte = unsafe { cursor.load::() }; + scan_non_zero_bits_in_metadata_word(cursor, byte as usize, visit_bit); + cursor += 1usize; + } +} + +pub fn scan_non_zero_bits_in_metadata_word( + meta_addr: Address, + mut word: usize, + visit_bit: &mut impl FnMut(Address, u32), +) { + while word != 0 { + let bit = word.trailing_zeros(); + visit_bit(meta_addr, bit); + word = word & (word - 1); + } +} + +pub fn scan_non_zero_bits_in_metadata_bits( + meta_addr: Address, + bit_start: u32, + bit_end: u32, + visit_bit: &mut impl FnMut(Address, u32), +) { + let byte = unsafe { meta_addr.load::() }; + for bit in bit_start..bit_end { + if byte & (1 << bit) != 0 { + visit_bit(meta_addr, bit); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs index c569222c1b..ba2da6a323 100644 --- a/src/util/object_enum.rs +++ b/src/util/object_enum.rs @@ -1,14 +1,14 @@ //! Helper types for object enumeration +use std::marker::PhantomData; + use crate::vm::VMBinding; use super::{ heap::{ chunk_map::{ChunkMap, ChunkState}, MonotonePageResource, - }, - linear_scan::Region, - Address, ObjectReference, + }, linear_scan::Region, metadata::side_metadata::spec_defs::VO_BIT, Address, ObjectReference }; /// A trait for enumerating objects in spaces. @@ -27,32 +27,42 @@ pub trait ObjectEnumerator { } /// An implementation of `ObjectEnumerator` that wraps a callback. -pub struct ClosureObjectEnumerator +pub struct ClosureObjectEnumerator where F: FnMut(ObjectReference), + VM: VMBinding, { object_callback: F, + phantom_data: PhantomData, } -impl ClosureObjectEnumerator +impl ClosureObjectEnumerator where F: FnMut(ObjectReference), + VM: VMBinding, { pub fn new(object_callback: F) -> Self { - Self { object_callback } + Self { + object_callback, + phantom_data: PhantomData, + } } } -impl ObjectEnumerator for ClosureObjectEnumerator +impl ObjectEnumerator for ClosureObjectEnumerator where F: FnMut(ObjectReference), + VM: VMBinding, { fn visit_object(&mut self, object: ObjectReference) { (self.object_callback)(object); } fn visit_address_range(&mut self, addr_range: std::ops::Range
) { - todo!() + VO_BIT.scan_non_zero_values::(addr_range.start, addr_range.end, |address| { + let object = ObjectReference::from_address::(address); + (self.object_callback)(object); + }) } } From a83798704ce332c967b9abce946d0b9d76cc3d6d Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 29 Jul 2024 21:34:32 +0800 Subject: [PATCH 03/28] Refactor iterate_meta_bits --- src/util/metadata/side_metadata/global.rs | 348 +++++++++++----------- src/util/metadata/side_metadata/mod.rs | 1 + src/util/metadata/side_metadata/ranges.rs | 51 ++++ 3 files changed, 232 insertions(+), 168 deletions(-) create mode 100644 src/util/metadata/side_metadata/ranges.rs diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 39df588b41..9250b1da06 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -8,6 +8,7 @@ use crate::util::metadata::metadata_val_traits::*; use crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC; use crate::util::Address; use num_traits::FromPrimitive; +use ranges::BitByteRange; use std::fmt; use std::io::Result; use std::sync::atomic::{AtomicU8, Ordering}; @@ -154,26 +155,33 @@ impl SideMetadataSpec { MMAPPER.is_mapped_address(meta_addr) } - /// This method is used for iterating side metadata for a data address range. As we cannot guarantee - /// that the data address range can be mapped to whole metadata bytes, we have to deal with cases that - /// we need to mask and zero certain bits in a metadata byte. The end address and the end bit are exclusive. - /// The end bit for update_bits could be 8, so overflowing needs to be taken care of. + /// This method is used for iterating side metadata for a data address range. As we cannot + /// guarantee that the data address range can be mapped to whole metadata bytes, we have to deal + /// with visiting only a bit range in a metadata byte. /// - /// Returns true if we iterate through every bits in the range. Return false if we abort iteration early. + /// The metadata bit range starts at the bit at index `meta_start_bit` in the byte at address + /// `meta_start_addr`, and ends at (but does not include) the bit at index `meta_end_bit` in the + /// byte at address `meta_end_addr`. /// /// Arguments: - /// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). Otherwise, - /// we iterate backwards (from end/high address to start/low address). - /// * `visit_bytes`/`visit_bits`: The closures returns whether the itertion is early terminated. - pub(super) fn iterate_meta_bits( + /// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). + /// Otherwise, we iterate backwards (from end/high address to start/low address). + /// * `visitor`: The callback that visits ranges of bits or bytes. It returns whether the + /// itertion is early terminated. + /// + /// Returns true if we iterate through every bits in the range. Return false if we abort + /// iteration early. + pub(super) fn iterate_meta_bits( meta_start_addr: Address, meta_start_bit: u8, meta_end_addr: Address, meta_end_bit: u8, forwards: bool, - visit_bytes: &impl Fn(Address, Address) -> bool, - visit_bits: &impl Fn(Address, u8, u8) -> bool, - ) -> bool { + visitor: &mut V, + ) -> bool + where + V: FnMut(BitByteRange) -> bool, + { trace!( "iterate_meta_bits: {} {}, {} {}", meta_start_addr, @@ -186,97 +194,68 @@ impl SideMetadataSpec { return false; } - // zeroing bytes + // visit whole bytes if meta_start_bit == 0 && meta_end_bit == 0 { - return visit_bytes(meta_start_addr, meta_end_addr); + return visitor(BitByteRange::Bytes { + start: meta_start_addr, + end: meta_end_addr, + }); } if meta_start_addr == meta_end_addr { - // Update bits in the same byte between start and end bit - visit_bits(meta_start_addr, meta_start_bit, meta_end_bit) + // Visit bits in the same byte between start and end bit + return visitor(BitByteRange::BitsInByte { + addr: meta_start_addr, + bit_start: meta_start_bit as usize, + bit_end: meta_end_bit as usize, + }); } else if meta_start_addr + 1usize == meta_end_addr && meta_end_bit == 0 { - // Update bits in the same byte after the start bit (between start bit and 8) - visit_bits(meta_start_addr, meta_start_bit, 8) + // Visit bits in the same byte after the start bit (between start bit and 8) + return visitor(BitByteRange::BitsInByte { + addr: meta_start_addr, + bit_start: meta_start_bit as usize, + bit_end: 8usize, + }); } else { + // We cannot let multiple closures capture `visitor` mutably at the same time, so we + // pass the visitor in as `v` every time. + + // update bits in the first byte + let visit_start = |v: &mut V| { + v(BitByteRange::BitsInByte { + addr: meta_start_addr, + bit_start: meta_start_bit as usize, + bit_end: 8usize, + }) + }; + + // update bytes in the middle + let visit_middle = |v: &mut V| { + let start = meta_start_addr + 1usize; + let end = meta_end_addr; + if start < end { + // non-empty middle range + v(BitByteRange::Bytes { start, end }) + } else { + // empty middle range + false + } + }; + + // update bits in the last byte + let visit_end = |v: &mut V| { + v(BitByteRange::BitsInByte { + addr: meta_end_addr, + bit_start: 0 as usize, + bit_end: meta_end_bit as usize, + }) + }; + // Update each segments. - // Clippy wants to move this if block up as a else-if block. But I think this is logically more clear. So disable the clippy warning. - #[allow(clippy::collapsible_else_if)] if forwards { - // update bits in the first byte - if Self::iterate_meta_bits( - meta_start_addr, - meta_start_bit, - meta_start_addr + 1usize, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bytes in the middle - if Self::iterate_meta_bits( - meta_start_addr + 1usize, - 0, - meta_end_addr, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bits in the last byte - if Self::iterate_meta_bits( - meta_end_addr, - 0, - meta_end_addr, - meta_end_bit, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - false + visit_start(visitor) || visit_middle(visitor) || visit_end(visitor) } else { - // update bits in the last byte - if Self::iterate_meta_bits( - meta_end_addr, - 0, - meta_end_addr, - meta_end_bit, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bytes in the middle - if Self::iterate_meta_bits( - meta_start_addr + 1usize, - 0, - meta_end_addr, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bits in the first byte - if Self::iterate_meta_bits( - meta_start_addr, - meta_start_bit, - meta_start_addr + 1usize, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - false + visit_end(visitor) || visit_middle(visitor) || visit_start(visitor) } } } @@ -288,16 +267,25 @@ impl SideMetadataSpec { meta_end_addr: Address, meta_end_bit: u8, ) { - let zero_bytes = |start: Address, end: Address| -> bool { - memory::zero(start, end - start); - false - }; - let zero_bits = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - // we are zeroing selected bits in one byte - let mask: u8 = - u8::MAX.checked_shl(end_bit.into()).unwrap_or(0) | !(u8::MAX << start_bit); // Get a mask that the bits we need to zero are set to zero, and the other bits are 1. - unsafe { addr.as_ref::() }.fetch_and(mask, Ordering::SeqCst); - false + let mut visitor = |range| { + match range { + BitByteRange::Bytes { start, end } => { + memory::zero(start, end - start); + false + } + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + // we are zeroing selected bit in one byte + // Get a mask that the bits we need to zero are set to zero, and the other bits are 1. + let mask: u8 = + u8::MAX.checked_shl(bit_end as u32).unwrap_or(0) | !(u8::MAX << bit_start); + unsafe { addr.as_ref::() }.fetch_and(mask, Ordering::SeqCst); + false + } + } }; Self::iterate_meta_bits( meta_start_addr, @@ -305,8 +293,7 @@ impl SideMetadataSpec { meta_end_addr, meta_end_bit, true, - &zero_bytes, - &zero_bits, + &mut visitor, ); } @@ -317,16 +304,25 @@ impl SideMetadataSpec { meta_end_addr: Address, meta_end_bit: u8, ) { - let set_bytes = |start: Address, end: Address| -> bool { - memory::set(start, 0xff, end - start); - false - }; - let set_bits = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - // we are setting selected bits in one byte - let mask: u8 = - !(u8::MAX.checked_shl(end_bit.into()).unwrap_or(0)) & (u8::MAX << start_bit); // Get a mask that the bits we need to set are 1, and the other bits are 0. - unsafe { addr.as_ref::() }.fetch_or(mask, Ordering::SeqCst); - false + let mut visitor = |range| { + match range { + BitByteRange::Bytes { start, end } => { + memory::set(start, 0xff, end - start); + false + } + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + // we are setting selected bits in one byte + // Get a mask that the bits we need to set are 1, and the other bits are 0. + let mask: u8 = !(u8::MAX.checked_shl(bit_end as u32).unwrap_or(0)) + & (u8::MAX << bit_start); + unsafe { addr.as_ref::() }.fetch_or(mask, Ordering::SeqCst); + false + } + } }; Self::iterate_meta_bits( meta_start_addr, @@ -334,8 +330,7 @@ impl SideMetadataSpec { meta_end_addr, meta_end_bit, true, - &set_bytes, - &set_bits, + &mut visitor, ); } @@ -498,37 +493,44 @@ impl SideMetadataSpec { debug_assert_eq!(dst_meta_start_bit, src_meta_start_bit); - let copy_bytes = |dst_start: Address, dst_end: Address| -> bool { - unsafe { - let byte_offset = dst_start - dst_meta_start_addr; - let src_start = src_meta_start_addr + byte_offset; - let size = dst_end - dst_start; - std::ptr::copy::(src_start.to_ptr(), dst_start.to_mut_ptr(), size); - false + let mut visitor = |range| { + match range { + BitByteRange::Bytes { + start: dst_start, + end: dst_end, + } => unsafe { + let byte_offset = dst_start - dst_meta_start_addr; + let src_start = src_meta_start_addr + byte_offset; + let size = dst_end - dst_start; + std::ptr::copy::(src_start.to_ptr(), dst_start.to_mut_ptr(), size); + false + }, + BitByteRange::BitsInByte { + addr: dst, + bit_start, + bit_end, + } => { + let byte_offset = dst - dst_meta_start_addr; + let src = src_meta_start_addr + byte_offset; + // we are setting selected bits in one byte + let mask: u8 = !(u8::MAX.checked_shl(bit_end as u32).unwrap_or(0)) + & (u8::MAX << bit_start); // Get a mask that the bits we need to set are 1, and the other bits are 0. + let old_src = unsafe { src.as_ref::() }.load(Ordering::Relaxed); + let old_dst = unsafe { dst.as_ref::() }.load(Ordering::Relaxed); + let new = (old_src & mask) | (old_dst & !mask); + unsafe { dst.as_ref::() }.store(new, Ordering::Relaxed); + false + } } }; - let copy_bits = |dst: Address, start_bit: u8, end_bit: u8| -> bool { - let byte_offset = dst - dst_meta_start_addr; - let src = src_meta_start_addr + byte_offset; - // we are setting selected bits in one byte - let mask: u8 = - !(u8::MAX.checked_shl(end_bit.into()).unwrap_or(0)) & (u8::MAX << start_bit); // Get a mask that the bits we need to set are 1, and the other bits are 0. - let old_src = unsafe { src.as_ref::() }.load(Ordering::Relaxed); - let old_dst = unsafe { dst.as_ref::() }.load(Ordering::Relaxed); - let new = (old_src & mask) | (old_dst & !mask); - unsafe { dst.as_ref::() }.store(new, Ordering::Relaxed); - false - }; - Self::iterate_meta_bits( dst_meta_start_addr, dst_meta_start_bit, dst_meta_end_addr, dst_meta_end_bit, true, - ©_bytes, - ©_bits, + &mut visitor, ); } @@ -1169,32 +1171,44 @@ impl SideMetadataSpec { // The result will be set by one of the following closures. // Use Cell so it doesn't need to be mutably borrowed by the two closures which Rust will complain. - let res = std::cell::Cell::new(None); - - let check_bytes_backwards = |start: Address, end: Address| -> bool { - match helpers::find_last_non_zero_bit_in_metadata_bytes(start, end) { - helpers::FindMetaBitResult::Found { addr, bit } => { - res.set(Some(contiguous_meta_address_to_address(self, addr, bit))); - // Return true to abort the search. We found the bit. - true + let mut res = None; + + let mut visitor = |range: BitByteRange| { + match range { + BitByteRange::Bytes { start, end } => { + match helpers::find_last_non_zero_bit_in_metadata_bytes(start, end) { + helpers::FindMetaBitResult::Found { addr, bit } => { + res = Some(contiguous_meta_address_to_address(self, addr, bit)); + // Return true to abort the search. We found the bit. + true + } + // If we see unmapped metadata, we don't need to search any more. + helpers::FindMetaBitResult::UnmappedMetadata => true, + // Return false to continue searching. + helpers::FindMetaBitResult::NotFound => false, + } } - // If we see unmapped metadata, we don't need to search any more. - helpers::FindMetaBitResult::UnmappedMetadata => true, - // Return false to continue searching. - helpers::FindMetaBitResult::NotFound => false, - } - }; - let check_bits_backwards = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - match helpers::find_last_non_zero_bit_in_metadata_bits(addr, start_bit, end_bit) { - helpers::FindMetaBitResult::Found { addr, bit } => { - res.set(Some(contiguous_meta_address_to_address(self, addr, bit))); - // Return true to abort the search. We found the bit. - true + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + match helpers::find_last_non_zero_bit_in_metadata_bits( + addr, + bit_start as u8, + bit_end as u8, + ) { + helpers::FindMetaBitResult::Found { addr, bit } => { + res = Some(contiguous_meta_address_to_address(self, addr, bit)); + // Return true to abort the search. We found the bit. + true + } + // If we see unmapped metadata, we don't need to search any more. + helpers::FindMetaBitResult::UnmappedMetadata => true, + // Return false to continue searching. + helpers::FindMetaBitResult::NotFound => false, + } } - // If we see unmapped metadata, we don't need to search any more. - helpers::FindMetaBitResult::UnmappedMetadata => true, - // Return false to continue searching. - helpers::FindMetaBitResult::NotFound => false, } }; @@ -1204,12 +1218,10 @@ impl SideMetadataSpec { end_meta_addr, end_meta_shift, false, - &check_bytes_backwards, - &check_bits_backwards, + &mut visitor, ); - res.get() - .map(|addr| addr.align_down(1 << self.log_bytes_in_region)) + res.map(|addr| addr.align_down(1 << self.log_bytes_in_region)) } } diff --git a/src/util/metadata/side_metadata/mod.rs b/src/util/metadata/side_metadata/mod.rs index 406f91167d..b8a1c65d9c 100644 --- a/src/util/metadata/side_metadata/mod.rs +++ b/src/util/metadata/side_metadata/mod.rs @@ -9,6 +9,7 @@ mod helpers_32; mod global; mod sanity; mod side_metadata_tests; +pub(crate) mod ranges; pub(crate) mod spec_defs; pub use constants::*; diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs new file mode 100644 index 0000000000..84c9f051f0 --- /dev/null +++ b/src/util/metadata/side_metadata/ranges.rs @@ -0,0 +1,51 @@ +//! Data types for visiting metadata ranges at different granularities + +use crate::util::Address; + +/// The type for bit offset in a byte, word or a SIMD vector. +/// +/// We use usize because it is generic and we may use AVX-512 some day, where u8 (256 max) is not +/// big enough. +pub type BitOffset = usize; + +/// A range of bytes or bits within a byte. It is the unit of visiting a contiguous bit range of a +/// side metadata. +/// +/// In general, a bit range of a bitmap starts with multiple bits in the byte, followed by many +/// whole bytes, and ends with multiple bits in the last byte. +/// +/// A range is never empty. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BitByteRange { + /// A range of whole bytes. + Bytes { + /// The starting address (inclusive) of the bytes. + start: Address, + /// The ending address (exclusive) of the bytes. + end: Address, + }, + /// A range of bits within a byte. + BitsInByte { + /// The address of the byte. + addr: Address, + /// The starting bit index (inclusive), starting with zero from the low-order bit. + bit_start: BitOffset, + /// The ending bit index (exclusive), starting with zero from the low-order bit. This may + /// be 8 which means the range includes the highest bit. Be careful when shifting a `u8` + /// value because shifting an `u8` by 8 is considered an overflow in Rust. + bit_end: BitOffset, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ByteWordRange { + Words { + start: Address, + end: Address, + }, + BytesInWord { + start: Address, + end: Address, + }, +} + From 7d3f79d4e50dacec881252562c8c7946e2513e55 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 30 Jul 2024 10:10:09 +0800 Subject: [PATCH 04/28] Fix warning --- src/mmtk.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index 04ed9959c4..d3d7f353f7 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -6,6 +6,8 @@ use crate::plan::Plan; use crate::policy::sft_map::{create_sft_map, SFTMap}; use crate::scheduler::GCWorkScheduler; +#[cfg(feature = "vo_bit")] +use crate::util::address::ObjectReference; #[cfg(feature = "analysis")] use crate::util::analysis::AnalysisManager; use crate::util::finalizable_processor::FinalizableProcessor; @@ -13,6 +15,7 @@ use crate::util::heap::gc_trigger::GCTrigger; use crate::util::heap::layout::vm_layout::VMLayout; use crate::util::heap::layout::{self, Mmapper, VMMap}; use crate::util::heap::HeapMeta; +use crate::util::opaque_pointer::*; use crate::util::options::Options; use crate::util::reference_processor::ReferenceProcessors; #[cfg(feature = "sanity")] @@ -20,7 +23,6 @@ use crate::util::sanity::sanity_checker::SanityChecker; #[cfg(feature = "extreme_assertions")] use crate::util::slot_logger::SlotLogger; use crate::util::statistics::stats::Stats; -use crate::util::{opaque_pointer::*, ObjectReference}; use crate::vm::ReferenceGlue; use crate::vm::VMBinding; use std::cell::UnsafeCell; From acaf942e40ec202cfd1c9fab810f4403f13100cb Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 31 Jul 2024 14:45:59 +0800 Subject: [PATCH 05/28] Benchmarks --- Cargo.toml | 6 ++- benches/bulk_meta/bzero_bset.rs | 62 +++++++++++++++++++++++ benches/bulk_meta/mod.rs | 1 + benches/main.rs | 22 +++++--- src/util/metadata/side_metadata/global.rs | 22 ++++++++ 5 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 benches/bulk_meta/bzero_bset.rs create mode 100644 benches/bulk_meta/mod.rs diff --git a/Cargo.toml b/Cargo.toml index b089819007..d039991ebd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,7 +55,7 @@ sysinfo = "0.30.9" [dev-dependencies] paste = "1.0.8" rand = "0.8.5" -criterion = "0.4" +criterion = "0.5.1" [build-dependencies] built = { version = "0.7.1", features = ["git2"] } @@ -75,6 +75,10 @@ perf_counter = ["pfm"] # CI scripts run those tests with this feature. mock_test = [] +# This feature is only used for benchmarks. +# It will expose some private functions for benchmarking. +bench = [] + # .github/scripts/ci-common.sh extracts features from the following part (including from comments). # So be careful when editing or adding stuff to the section below. diff --git a/benches/bulk_meta/bzero_bset.rs b/benches/bulk_meta/bzero_bset.rs new file mode 100644 index 0000000000..5d28b25daf --- /dev/null +++ b/benches/bulk_meta/bzero_bset.rs @@ -0,0 +1,62 @@ +//! Benchmarks for bulk zeroing and setting. + +use std::os::raw::c_void; + +use criterion::Criterion; +use mmtk::util::{constants::LOG_BITS_IN_WORD, metadata::side_metadata::SideMetadataSpec, Address}; + +fn allocate_aligned(size: usize) -> Address { + let ptr = unsafe { + std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align(size, size).unwrap()) + }; + Address::from_mut_ptr(ptr) +} + +const LINE_BYTES: usize = 256usize; // Match an Immix line size. +const BLOCK_BYTES: usize = 32768usize; // Match an Immix block size. + +// Asssume one-bit-per-word metadata (matching VO bits). +const LINE_META_BYTES: usize = LINE_BYTES >> LOG_BITS_IN_WORD; +const BLOCK_META_BYTES: usize = BLOCK_BYTES >> LOG_BITS_IN_WORD; + +pub fn bench(c: &mut Criterion) { + c.bench_function("bzero_bset_line", |b| { + let start = allocate_aligned(LINE_META_BYTES); + let end = start + LINE_META_BYTES; + + b.iter(|| { + SideMetadataSpec::bench_set_meta_bits(start, 0, end, 0); + SideMetadataSpec::bench_zero_meta_bits(start, 0, end, 0); + }) + }); + + c.bench_function("bzero_bset_line_memset", |b| { + let start = allocate_aligned(LINE_META_BYTES); + let end = start + LINE_META_BYTES; + + b.iter(|| unsafe { + libc::memset(start.as_mut_ref() as *mut c_void, 0xff, end - start); + libc::memset(start.as_mut_ref() as *mut c_void, 0x00, end - start); + }) + }); + + c.bench_function("bzero_bset_block", |b| { + let start = allocate_aligned(BLOCK_META_BYTES); + let end = start + BLOCK_META_BYTES; + + b.iter(|| { + SideMetadataSpec::bench_set_meta_bits(start, 0, end, 0); + SideMetadataSpec::bench_zero_meta_bits(start, 0, end, 0); + }) + }); + + c.bench_function("bzero_bset_block_memset", |b| { + let start = allocate_aligned(BLOCK_META_BYTES); + let end = start + BLOCK_META_BYTES; + + b.iter(|| unsafe { + libc::memset(start.as_mut_ref() as *mut c_void, 0xff, end - start); + libc::memset(start.as_mut_ref() as *mut c_void, 0x00, end - start); + }) + }); +} diff --git a/benches/bulk_meta/mod.rs b/benches/bulk_meta/mod.rs new file mode 100644 index 0000000000..1b052d613f --- /dev/null +++ b/benches/bulk_meta/mod.rs @@ -0,0 +1 @@ +pub mod bzero_bset; diff --git a/benches/main.rs b/benches/main.rs index 6c735ce4e2..b982b2d322 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -19,11 +19,14 @@ use criterion::Criterion; // However, I will just keep these benchmarks here. If we find it not useful, and we do not plan to improve MockVM, we can delete // them. +#[cfg(feature = "bench")] +mod bulk_meta; + #[cfg(feature = "mock_test")] mod mock_bench; -pub fn bench_main(_c: &mut Criterion) { - #[cfg(feature = "mock_test")] +#[cfg(feature = "mock_test")] +pub fn bench_mock(_c: &mut Criterion) { match std::env::var("MMTK_BENCH") { Ok(bench) => match bench.as_str() { "alloc" => mock_bench::alloc::bench(_c), @@ -33,12 +36,17 @@ pub fn bench_main(_c: &mut Criterion) { }, Err(_) => panic!("Need to name a benchmark by the env var MMTK_BENCH"), } +} - #[cfg(not(feature = "mock_test"))] - { - eprintln!("ERROR: Currently there are no benchmarks when the \"mock_test\" feature is not enabled."); - std::process::exit(1); - } +pub fn bench_main(_c: &mut Criterion) { + // If the "mock_test" feature is enabled, we only run mock test. + #[cfg(feature = "mock_test")] + return bench_mock(_c); + + // Some benchmarks rely on the "bench" feature to expose some private functions. + // Run them with `cargo bench --features bench`. + #[cfg(feature = "bench")] + bulk_meta::bzero_bset::bench(_c); } criterion_group!(benches, bench_main); diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 9250b1da06..48e6616275 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -297,6 +297,17 @@ impl SideMetadataSpec { ); } + /// Expose `zero_meta_bits` when running `cargo bench`. + #[cfg(feature = "bench")] + pub fn bench_zero_meta_bits( + meta_start_addr: Address, + meta_start_bit: u8, + meta_end_addr: Address, + meta_end_bit: u8, + ) { + Self::zero_meta_bits(meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit) + } + /// This method is used for bulk setting side metadata for a data address range. pub(super) fn set_meta_bits( meta_start_addr: Address, @@ -334,6 +345,17 @@ impl SideMetadataSpec { ); } + /// Expose `set_meta_bits` when running `cargo bench`. + #[cfg(feature = "bench")] + pub fn bench_set_meta_bits( + meta_start_addr: Address, + meta_start_bit: u8, + meta_end_addr: Address, + meta_end_bit: u8, + ) { + Self::set_meta_bits(meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit) + } + /// This method does bulk update for the given data range. It calculates the metadata bits for the given data range, /// and invoke the given method to update the metadata bits. pub(super) fn bulk_update_metadata( From f4a08b3181173f22ad4111fd3d86e41af681cb77 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 1 Aug 2024 16:41:31 +0800 Subject: [PATCH 06/28] Remove ByteWordRange Let's add it back when implementing VO bit scanning. --- src/util/metadata/side_metadata/ranges.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs index 84c9f051f0..cb970ab352 100644 --- a/src/util/metadata/side_metadata/ranges.rs +++ b/src/util/metadata/side_metadata/ranges.rs @@ -36,16 +36,3 @@ pub enum BitByteRange { bit_end: BitOffset, }, } - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ByteWordRange { - Words { - start: Address, - end: Address, - }, - BytesInWord { - start: Address, - end: Address, - }, -} - From a51cdc4c5823370a1afdbcdc7d33f60a4a92a7b9 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 1 Aug 2024 16:54:48 +0800 Subject: [PATCH 07/28] Fix clippy warnings --- src/util/metadata/side_metadata/global.rs | 90 ++++++++++++----------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 48e6616275..4f3ebd8745 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -189,12 +189,13 @@ impl SideMetadataSpec { meta_end_addr, meta_end_bit ); - // Start/end is the same, we don't need to do anything. + + // The start and the end are the same, we don't need to do anything. if meta_start_addr == meta_end_addr && meta_start_bit == meta_end_bit { return false; } - // visit whole bytes + // If the range is already byte-aligned, visit whole bits. if meta_start_bit == 0 && meta_end_bit == 0 { return visitor(BitByteRange::Bytes { start: meta_start_addr, @@ -202,61 +203,68 @@ impl SideMetadataSpec { }); } + // If the start and the end are within the same byte, + // visit the bit range within the byte. if meta_start_addr == meta_end_addr { - // Visit bits in the same byte between start and end bit return visitor(BitByteRange::BitsInByte { addr: meta_start_addr, bit_start: meta_start_bit as usize, bit_end: meta_end_bit as usize, }); - } else if meta_start_addr + 1usize == meta_end_addr && meta_end_bit == 0 { - // Visit bits in the same byte after the start bit (between start bit and 8) + } + + // If the end is the 0th bit of the next byte of the start, + // visit the bit range from the start to the end (bit 8) of the same byte. + if meta_start_addr + 1usize == meta_end_addr && meta_end_bit == 0 { return visitor(BitByteRange::BitsInByte { addr: meta_start_addr, bit_start: meta_start_bit as usize, bit_end: 8usize, }); - } else { - // We cannot let multiple closures capture `visitor` mutably at the same time, so we - // pass the visitor in as `v` every time. - - // update bits in the first byte - let visit_start = |v: &mut V| { - v(BitByteRange::BitsInByte { - addr: meta_start_addr, - bit_start: meta_start_bit as usize, - bit_end: 8usize, - }) - }; + } - // update bytes in the middle - let visit_middle = |v: &mut V| { - let start = meta_start_addr + 1usize; - let end = meta_end_addr; - if start < end { - // non-empty middle range - v(BitByteRange::Bytes { start, end }) - } else { - // empty middle range - false - } - }; + // Otherwise, the range spans over multiple bytes, and is bit-unaligned at either the start + // or the end. Try to break it into (at most) three sub-ranges. - // update bits in the last byte - let visit_end = |v: &mut V| { - v(BitByteRange::BitsInByte { - addr: meta_end_addr, - bit_start: 0 as usize, - bit_end: meta_end_bit as usize, - }) - }; + // We cannot let multiple closures capture `visitor` mutably at the same time, so we + // pass the visitor in as `v` every time. - // Update each segments. - if forwards { - visit_start(visitor) || visit_middle(visitor) || visit_end(visitor) + // update bits in the first byte + let visit_start = |v: &mut V| { + v(BitByteRange::BitsInByte { + addr: meta_start_addr, + bit_start: meta_start_bit as usize, + bit_end: 8usize, + }) + }; + + // update bytes in the middle + let visit_middle = |v: &mut V| { + let start = meta_start_addr + 1usize; + let end = meta_end_addr; + if start < end { + // non-empty middle range + v(BitByteRange::Bytes { start, end }) } else { - visit_end(visitor) || visit_middle(visitor) || visit_start(visitor) + // empty middle range + false } + }; + + // update bits in the last byte + let visit_end = |v: &mut V| { + v(BitByteRange::BitsInByte { + addr: meta_end_addr, + bit_start: 0usize, + bit_end: meta_end_bit as usize, + }) + }; + + // Update each segments. + if forwards { + visit_start(visitor) || visit_middle(visitor) || visit_end(visitor) + } else { + visit_end(visitor) || visit_middle(visitor) || visit_start(visitor) } } From b1486e3a9e0d7b7641170b60cf2a47b2714a3695 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 1 Aug 2024 16:55:07 +0800 Subject: [PATCH 08/28] Formatting --- src/util/metadata/side_metadata/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/metadata/side_metadata/mod.rs b/src/util/metadata/side_metadata/mod.rs index b8a1c65d9c..612c69223f 100644 --- a/src/util/metadata/side_metadata/mod.rs +++ b/src/util/metadata/side_metadata/mod.rs @@ -7,9 +7,9 @@ mod helpers; mod helpers_32; mod global; +pub(crate) mod ranges; mod sanity; mod side_metadata_tests; -pub(crate) mod ranges; pub(crate) mod spec_defs; pub use constants::*; From ab28a7913c293a68843a7634d16d9415dff4ba4e Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 1 Aug 2024 18:10:16 +0800 Subject: [PATCH 09/28] Move iterate_meta_bits to ranges::break_bit_range --- src/util/metadata/side_metadata/global.rs | 121 +--------------------- src/util/metadata/side_metadata/ranges.rs | 115 ++++++++++++++++++++ 2 files changed, 119 insertions(+), 117 deletions(-) diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 4f3ebd8745..fd701b92c2 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -155,119 +155,6 @@ impl SideMetadataSpec { MMAPPER.is_mapped_address(meta_addr) } - /// This method is used for iterating side metadata for a data address range. As we cannot - /// guarantee that the data address range can be mapped to whole metadata bytes, we have to deal - /// with visiting only a bit range in a metadata byte. - /// - /// The metadata bit range starts at the bit at index `meta_start_bit` in the byte at address - /// `meta_start_addr`, and ends at (but does not include) the bit at index `meta_end_bit` in the - /// byte at address `meta_end_addr`. - /// - /// Arguments: - /// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). - /// Otherwise, we iterate backwards (from end/high address to start/low address). - /// * `visitor`: The callback that visits ranges of bits or bytes. It returns whether the - /// itertion is early terminated. - /// - /// Returns true if we iterate through every bits in the range. Return false if we abort - /// iteration early. - pub(super) fn iterate_meta_bits( - meta_start_addr: Address, - meta_start_bit: u8, - meta_end_addr: Address, - meta_end_bit: u8, - forwards: bool, - visitor: &mut V, - ) -> bool - where - V: FnMut(BitByteRange) -> bool, - { - trace!( - "iterate_meta_bits: {} {}, {} {}", - meta_start_addr, - meta_start_bit, - meta_end_addr, - meta_end_bit - ); - - // The start and the end are the same, we don't need to do anything. - if meta_start_addr == meta_end_addr && meta_start_bit == meta_end_bit { - return false; - } - - // If the range is already byte-aligned, visit whole bits. - if meta_start_bit == 0 && meta_end_bit == 0 { - return visitor(BitByteRange::Bytes { - start: meta_start_addr, - end: meta_end_addr, - }); - } - - // If the start and the end are within the same byte, - // visit the bit range within the byte. - if meta_start_addr == meta_end_addr { - return visitor(BitByteRange::BitsInByte { - addr: meta_start_addr, - bit_start: meta_start_bit as usize, - bit_end: meta_end_bit as usize, - }); - } - - // If the end is the 0th bit of the next byte of the start, - // visit the bit range from the start to the end (bit 8) of the same byte. - if meta_start_addr + 1usize == meta_end_addr && meta_end_bit == 0 { - return visitor(BitByteRange::BitsInByte { - addr: meta_start_addr, - bit_start: meta_start_bit as usize, - bit_end: 8usize, - }); - } - - // Otherwise, the range spans over multiple bytes, and is bit-unaligned at either the start - // or the end. Try to break it into (at most) three sub-ranges. - - // We cannot let multiple closures capture `visitor` mutably at the same time, so we - // pass the visitor in as `v` every time. - - // update bits in the first byte - let visit_start = |v: &mut V| { - v(BitByteRange::BitsInByte { - addr: meta_start_addr, - bit_start: meta_start_bit as usize, - bit_end: 8usize, - }) - }; - - // update bytes in the middle - let visit_middle = |v: &mut V| { - let start = meta_start_addr + 1usize; - let end = meta_end_addr; - if start < end { - // non-empty middle range - v(BitByteRange::Bytes { start, end }) - } else { - // empty middle range - false - } - }; - - // update bits in the last byte - let visit_end = |v: &mut V| { - v(BitByteRange::BitsInByte { - addr: meta_end_addr, - bit_start: 0usize, - bit_end: meta_end_bit as usize, - }) - }; - - // Update each segments. - if forwards { - visit_start(visitor) || visit_middle(visitor) || visit_end(visitor) - } else { - visit_end(visitor) || visit_middle(visitor) || visit_start(visitor) - } - } - /// This method is used for bulk zeroing side metadata for a data address range. pub(super) fn zero_meta_bits( meta_start_addr: Address, @@ -295,7 +182,7 @@ impl SideMetadataSpec { } } }; - Self::iterate_meta_bits( + ranges::break_bit_range( meta_start_addr, meta_start_bit, meta_end_addr, @@ -343,7 +230,7 @@ impl SideMetadataSpec { } } }; - Self::iterate_meta_bits( + ranges::break_bit_range( meta_start_addr, meta_start_bit, meta_end_addr, @@ -554,7 +441,7 @@ impl SideMetadataSpec { } }; - Self::iterate_meta_bits( + ranges::break_bit_range( dst_meta_start_addr, dst_meta_start_bit, dst_meta_end_addr, @@ -1242,7 +1129,7 @@ impl SideMetadataSpec { } }; - Self::iterate_meta_bits( + ranges::break_bit_range( start_meta_addr, start_meta_shift, end_meta_addr, diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs index cb970ab352..9b4baa8f9a 100644 --- a/src/util/metadata/side_metadata/ranges.rs +++ b/src/util/metadata/side_metadata/ranges.rs @@ -36,3 +36,118 @@ pub enum BitByteRange { bit_end: BitOffset, }, } + +/// Break a bit range into sub-ranges of whole bytes and in-byte bits. +/// +/// This method is primarily used for iterating side metadata for a data address range. As we cannot +/// guarantee that the data address range can be mapped to whole metadata bytes, we have to deal +/// with visiting only a bit range in a metadata byte. +/// +/// The bit range starts at the bit at index `meta_start_bit` in the byte at address +/// `meta_start_addr`, and ends at (but does not include) the bit at index `meta_end_bit` in the +/// byte at address `meta_end_addr`. +/// +/// Arguments: +/// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). +/// Otherwise, we iterate backwards (from end/high address to start/low address). +/// * `visitor`: The callback that visits ranges of bits or bytes. It returns whether the itertion +/// is early terminated. +/// +/// Returns true if we iterate through every bits in the range. Return false if we abort iteration +/// early. +pub fn break_bit_range( + start_addr: Address, + start_bit: u8, + end_addr: Address, + end_bit: u8, + forwards: bool, + visitor: &mut V, +) -> bool +where + V: FnMut(BitByteRange) -> bool, +{ + trace!( + "iterate_meta_bits: {} {}, {} {}", + start_addr, + start_bit, + end_addr, + end_bit + ); + + // The start and the end are the same, we don't need to do anything. + if start_addr == end_addr && start_bit == end_bit { + return false; + } + + // If the range is already byte-aligned, visit whole bits. + if start_bit == 0 && end_bit == 0 { + return visitor(BitByteRange::Bytes { + start: start_addr, + end: end_addr, + }); + } + + // If the start and the end are within the same byte, + // visit the bit range within the byte. + if start_addr == end_addr { + return visitor(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit as usize, + bit_end: end_bit as usize, + }); + } + + // If the end is the 0th bit of the next byte of the start, + // visit the bit range from the start to the end (bit 8) of the same byte. + if start_addr + 1usize == end_addr && end_bit == 0 { + return visitor(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit as usize, + bit_end: 8usize, + }); + } + + // Otherwise, the range spans over multiple bytes, and is bit-unaligned at either the start + // or the end. Try to break it into (at most) three sub-ranges. + + // We cannot let multiple closures capture `visitor` mutably at the same time, so we + // pass the visitor in as `v` every time. + + // update bits in the first byte + let visit_start = |v: &mut V| { + v(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit as usize, + bit_end: 8usize, + }) + }; + + // update bytes in the middle + let visit_middle = |v: &mut V| { + let start = start_addr + 1usize; + let end = end_addr; + if start < end { + // non-empty middle range + v(BitByteRange::Bytes { start, end }) + } else { + // empty middle range + false + } + }; + + // update bits in the last byte + let visit_end = |v: &mut V| { + v(BitByteRange::BitsInByte { + addr: end_addr, + bit_start: 0usize, + bit_end: end_bit as usize, + }) + }; + + // Update each segments. + if forwards { + visit_start(visitor) || visit_middle(visitor) || visit_end(visitor) + } else { + visit_end(visitor) || visit_middle(visitor) || visit_start(visitor) + } +} From e2bafd5a9e8827f5e03e77eacbf948e6038fcb3d Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 1 Aug 2024 18:25:41 +0800 Subject: [PATCH 10/28] Add tests and fix bugs --- src/util/metadata/side_metadata/ranges.rs | 241 +++++++++++++++++++++- 1 file changed, 230 insertions(+), 11 deletions(-) diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs index 9b4baa8f9a..9c019da9bf 100644 --- a/src/util/metadata/side_metadata/ranges.rs +++ b/src/util/metadata/side_metadata/ranges.rs @@ -110,21 +110,34 @@ where // Otherwise, the range spans over multiple bytes, and is bit-unaligned at either the start // or the end. Try to break it into (at most) three sub-ranges. + let start_aligned = start_bit == 0; + let end_aligned = end_bit == 0; + // We cannot let multiple closures capture `visitor` mutably at the same time, so we // pass the visitor in as `v` every time. // update bits in the first byte let visit_start = |v: &mut V| { - v(BitByteRange::BitsInByte { - addr: start_addr, - bit_start: start_bit as usize, - bit_end: 8usize, - }) + if !start_aligned { + v(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit as usize, + bit_end: 8usize, + }) + } else { + // The start is already aligned. No sub-byte range at the start. + false + } }; // update bytes in the middle let visit_middle = |v: &mut V| { - let start = start_addr + 1usize; + let start = if start_aligned { + start_addr + } else { + // If the start is not aligned, the whole-byte range starts after the first byte. + start_addr + 1usize + }; let end = end_addr; if start < end { // non-empty middle range @@ -137,11 +150,16 @@ where // update bits in the last byte let visit_end = |v: &mut V| { - v(BitByteRange::BitsInByte { - addr: end_addr, - bit_start: 0usize, - bit_end: end_bit as usize, - }) + if !end_aligned { + v(BitByteRange::BitsInByte { + addr: end_addr, + bit_start: 0usize, + bit_end: end_bit as usize, + }) + } else { + // The end is aligned. No sub-byte range at the end. + false + } }; // Update each segments. @@ -151,3 +169,204 @@ where visit_end(visitor) || visit_middle(visitor) || visit_start(visitor) } } + +#[cfg(test)] +mod tests { + use crate::util::constants::BITS_IN_BYTE; + + use super::*; + + fn mk_addr(addr: usize) -> Address { + unsafe { Address::from_usize(addr) } + } + + fn break_bit_range_wrapped( + start_addr: Address, + start_bit: usize, + end_addr: Address, + end_bit: usize, + ) -> Vec { + let mut vec = vec![]; + break_bit_range( + start_addr, + start_bit as u8, + end_addr, + end_bit as u8, + true, + &mut |range| { + vec.push(range); + false + }, + ); + vec + } + + #[test] + fn test_empty_range() { + let base = mk_addr(0x1000); + for bit in 0..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit, base, bit); + assert!( + result.is_empty(), + "Not empty. bit: {bit}, result: {result:?}" + ); + } + } + + #[test] + fn test_subbyte_range() { + let base = mk_addr(0x1000); + for bit0 in 0..BITS_IN_BYTE { + for bit1 in (bit0 + 1)..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base, bit1); + assert_eq!( + result, + vec![BitByteRange::BitsInByte { + addr: base, + bit_start: bit0, + bit_end: bit1 + }], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_end_byte_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base + 1usize, 0); + assert_eq!( + result, + vec![BitByteRange::BitsInByte { + addr: base, + bit_start: bit0, + bit_end: BITS_IN_BYTE + }], + "Not equal. bit0: {bit0}", + ); + } + } + + #[test] + fn test_adjacent_grain_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base + 1usize, bit1); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base, + bit_start: bit0, + bit_end: BITS_IN_BYTE, + }, + BitByteRange::BitsInByte { + addr: base + 1usize, + bit_start: 0, + bit_end: bit1, + }, + ], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_left_and_whole_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + for byte1 in 2usize..8 { + let result = break_bit_range_wrapped(base, bit0, base + byte1, 0); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base, + bit_start: bit0, + bit_end: BITS_IN_BYTE, + }, + BitByteRange::Bytes { + start: base + 1usize, + end: base + byte1, + }, + ], + "Not equal. bit0: {bit0}, byte1: {byte1}", + ); + } + } + } + + #[test] + fn test_whole_and_right_range() { + let base = mk_addr(0x1000); + for byte0 in 1..8 { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base - byte0, 0, base, bit1); + assert_eq!( + result, + vec![ + BitByteRange::Bytes { + start: base - byte0, + end: base, + }, + BitByteRange::BitsInByte { + addr: base, + bit_start: 0, + bit_end: bit1, + }, + ], + "Not equal. byte0: {byte0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_whole_range() { + let base = mk_addr(0x1000); + let result = break_bit_range_wrapped(base, 0, base + 42usize, 0); + assert_eq!( + result, + vec![BitByteRange::Bytes { + start: base, + end: base + 42usize, + },], + ); + } + + #[test] + fn test_left_whole_right_range() { + let base0 = mk_addr(0x1000); + let base1 = mk_addr(0x2000); + + for bit0 in 1..BITS_IN_BYTE { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base0 - 1usize, bit0, base1, bit1); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base0 - 1usize, + bit_start: bit0, + bit_end: BITS_IN_BYTE, + }, + BitByteRange::Bytes { + start: base0, + end: base1, + }, + BitByteRange::BitsInByte { + addr: base1, + bit_start: 0, + bit_end: bit1, + }, + ], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } +} From c3c3dfa8a9d47d6eb88d86a7fc1970aa973927d6 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 2 Aug 2024 15:05:20 +0800 Subject: [PATCH 11/28] Just use u8 for the type of bit offset. --- src/util/metadata/side_metadata/global.rs | 7 +-- src/util/metadata/side_metadata/ranges.rs | 61 ++++++++++------------- 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index fd701b92c2..97c961d915 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -1110,11 +1110,8 @@ impl SideMetadataSpec { bit_start, bit_end, } => { - match helpers::find_last_non_zero_bit_in_metadata_bits( - addr, - bit_start as u8, - bit_end as u8, - ) { + match helpers::find_last_non_zero_bit_in_metadata_bits(addr, bit_start, bit_end) + { helpers::FindMetaBitResult::Found { addr, bit } => { res = Some(contiguous_meta_address_to_address(self, addr, bit)); // Return true to abort the search. We found the bit. diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs index 9c019da9bf..93fb9b685c 100644 --- a/src/util/metadata/side_metadata/ranges.rs +++ b/src/util/metadata/side_metadata/ranges.rs @@ -2,11 +2,8 @@ use crate::util::Address; -/// The type for bit offset in a byte, word or a SIMD vector. -/// -/// We use usize because it is generic and we may use AVX-512 some day, where u8 (256 max) is not -/// big enough. -pub type BitOffset = usize; +/// The type for bit offset in a byte. +pub type BitOffset = u8; /// A range of bytes or bits within a byte. It is the unit of visiting a contiguous bit range of a /// side metadata. @@ -57,23 +54,15 @@ pub enum BitByteRange { /// early. pub fn break_bit_range( start_addr: Address, - start_bit: u8, + start_bit: BitOffset, end_addr: Address, - end_bit: u8, + end_bit: BitOffset, forwards: bool, visitor: &mut V, ) -> bool where V: FnMut(BitByteRange) -> bool, { - trace!( - "iterate_meta_bits: {} {}, {} {}", - start_addr, - start_bit, - end_addr, - end_bit - ); - // The start and the end are the same, we don't need to do anything. if start_addr == end_addr && start_bit == end_bit { return false; @@ -92,8 +81,8 @@ where if start_addr == end_addr { return visitor(BitByteRange::BitsInByte { addr: start_addr, - bit_start: start_bit as usize, - bit_end: end_bit as usize, + bit_start: start_bit, + bit_end: end_bit, }); } @@ -102,8 +91,8 @@ where if start_addr + 1usize == end_addr && end_bit == 0 { return visitor(BitByteRange::BitsInByte { addr: start_addr, - bit_start: start_bit as usize, - bit_end: 8usize, + bit_start: start_bit, + bit_end: 8_u8, }); } @@ -121,8 +110,8 @@ where if !start_aligned { v(BitByteRange::BitsInByte { addr: start_addr, - bit_start: start_bit as usize, - bit_end: 8usize, + bit_start: start_bit, + bit_end: 8_u8, }) } else { // The start is already aligned. No sub-byte range at the start. @@ -153,8 +142,8 @@ where if !end_aligned { v(BitByteRange::BitsInByte { addr: end_addr, - bit_start: 0usize, - bit_end: end_bit as usize, + bit_start: 0_u8, + bit_end: end_bit, }) } else { // The end is aligned. No sub-byte range at the end. @@ -223,8 +212,8 @@ mod tests { result, vec![BitByteRange::BitsInByte { addr: base, - bit_start: bit0, - bit_end: bit1 + bit_start: bit0 as u8, + bit_end: bit1 as u8 }], "Not equal. bit0: {bit0}, bit1: {bit1}", ); @@ -241,8 +230,8 @@ mod tests { result, vec![BitByteRange::BitsInByte { addr: base, - bit_start: bit0, - bit_end: BITS_IN_BYTE + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8 }], "Not equal. bit0: {bit0}", ); @@ -260,13 +249,13 @@ mod tests { vec![ BitByteRange::BitsInByte { addr: base, - bit_start: bit0, - bit_end: BITS_IN_BYTE, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, }, BitByteRange::BitsInByte { addr: base + 1usize, bit_start: 0, - bit_end: bit1, + bit_end: bit1 as u8, }, ], "Not equal. bit0: {bit0}, bit1: {bit1}", @@ -286,8 +275,8 @@ mod tests { vec![ BitByteRange::BitsInByte { addr: base, - bit_start: bit0, - bit_end: BITS_IN_BYTE, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, }, BitByteRange::Bytes { start: base + 1usize, @@ -316,7 +305,7 @@ mod tests { BitByteRange::BitsInByte { addr: base, bit_start: 0, - bit_end: bit1, + bit_end: bit1 as u8, }, ], "Not equal. byte0: {byte0}, bit1: {bit1}", @@ -351,8 +340,8 @@ mod tests { vec![ BitByteRange::BitsInByte { addr: base0 - 1usize, - bit_start: bit0, - bit_end: BITS_IN_BYTE, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, }, BitByteRange::Bytes { start: base0, @@ -361,7 +350,7 @@ mod tests { BitByteRange::BitsInByte { addr: base1, bit_start: 0, - bit_end: bit1, + bit_end: bit1 as u8, }, ], "Not equal. bit0: {bit0}, bit1: {bit1}", From 3a4747c034ed5db89f2e3104642626f37df9971f Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 2 Aug 2024 15:41:47 +0800 Subject: [PATCH 12/28] Fix and comment --- src/util/metadata/side_metadata/global.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index cc2f551e43..0034eadc38 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -1138,6 +1138,12 @@ impl SideMetadataSpec { res.map(|addr| addr.align_down(1 << self.log_bytes_in_region)) } + /// Search for data addresses that have non zero values in the side metadata. + /// + /// This function searches the side metadata for the data address range from `data_start_addr` + /// (inclusive) to `data_end_addr` (exclusive). + /// + /// `visit_data` is called for each data address that has non-zero side metadata. pub fn scan_non_zero_values( &self, data_start_addr: Address, @@ -1217,7 +1223,7 @@ impl SideMetadataSpec { false }; - Self::iterate_meta_bits( + ranges::break_bit_range( start_meta_addr, start_meta_shift, end_meta_addr, From 765da3b82c0cc99a720d62ed0bb5d21492689262 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 2 Aug 2024 15:42:25 +0800 Subject: [PATCH 13/28] Formatting --- src/policy/copyspace.rs | 2 +- src/util/object_enum.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/policy/copyspace.rs b/src/policy/copyspace.rs index 80f4962584..84e875191e 100644 --- a/src/policy/copyspace.rs +++ b/src/policy/copyspace.rs @@ -6,11 +6,11 @@ use crate::policy::sft::SFT; use crate::policy::space::{CommonSpace, Space}; use crate::scheduler::GCWorker; use crate::util::alloc::allocator::AllocatorContext; -use crate::util::{copy::*, object_enum}; use crate::util::heap::{MonotonePageResource, PageResource}; use crate::util::metadata::{extract_side_metadata, MetadataSpec}; use crate::util::object_enum::ObjectEnumerator; use crate::util::object_forwarding; +use crate::util::{copy::*, object_enum}; use crate::util::{Address, ObjectReference}; use crate::vm::*; use libc::{mprotect, PROT_EXEC, PROT_NONE, PROT_READ, PROT_WRITE}; diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs index ba2da6a323..f2ce2bde8a 100644 --- a/src/util/object_enum.rs +++ b/src/util/object_enum.rs @@ -8,7 +8,10 @@ use super::{ heap::{ chunk_map::{ChunkMap, ChunkState}, MonotonePageResource, - }, linear_scan::Region, metadata::side_metadata::spec_defs::VO_BIT, Address, ObjectReference + }, + linear_scan::Region, + metadata::side_metadata::spec_defs::VO_BIT, + Address, ObjectReference, }; /// A trait for enumerating objects in spaces. From a73061ca1c93f505e3e4efebb41e5a830e8108b1 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 5 Aug 2024 10:31:52 +0800 Subject: [PATCH 14/28] Comments --- src/util/metadata/side_metadata/ranges.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs index 93fb9b685c..66d89209da 100644 --- a/src/util/metadata/side_metadata/ranges.rs +++ b/src/util/metadata/side_metadata/ranges.rs @@ -96,16 +96,16 @@ where }); } - // Otherwise, the range spans over multiple bytes, and is bit-unaligned at either the start - // or the end. Try to break it into (at most) three sub-ranges. + // Otherwise, the range spans over multiple bytes, and is bit-unaligned at either the start or + // the end. Try to break it into (at most) three sub-ranges. let start_aligned = start_bit == 0; let end_aligned = end_bit == 0; - // We cannot let multiple closures capture `visitor` mutably at the same time, so we - // pass the visitor in as `v` every time. + // We cannot let multiple closures capture `visitor` mutably at the same time, so we pass the + // visitor in as `v` every time. - // update bits in the first byte + // visit bits within the first byte let visit_start = |v: &mut V| { if !start_aligned { v(BitByteRange::BitsInByte { @@ -119,7 +119,7 @@ where } }; - // update bytes in the middle + // visit whole bytes in the middle let visit_middle = |v: &mut V| { let start = if start_aligned { start_addr @@ -129,15 +129,14 @@ where }; let end = end_addr; if start < end { - // non-empty middle range v(BitByteRange::Bytes { start, end }) } else { - // empty middle range + // There are no whole bytes in the middle. false } }; - // update bits in the last byte + // visit bits within the last byte let visit_end = |v: &mut V| { if !end_aligned { v(BitByteRange::BitsInByte { From 419655f7e9b2a072953e2a173b1d462dde61b302 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 5 Aug 2024 13:36:50 +0800 Subject: [PATCH 15/28] Minor changes --- src/util/metadata/side_metadata/global.rs | 48 ++++++++++++---------- src/util/metadata/side_metadata/helpers.rs | 15 +++---- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 0034eadc38..9107e5e6e1 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -1138,10 +1138,11 @@ impl SideMetadataSpec { res.map(|addr| addr.align_down(1 << self.log_bytes_in_region)) } - /// Search for data addresses that have non zero values in the side metadata. + /// Search for data addresses that have non zero values in the side metadata. This method is + /// primarily used for heap traversal by scanning the VO bits. /// /// This function searches the side metadata for the data address range from `data_start_addr` - /// (inclusive) to `data_end_addr` (exclusive). + /// (inclusive) to `data_end_addr` (exclusive). The data address range must be fully mapped. /// /// `visit_data` is called for each data address that has non-zero side metadata. pub fn scan_non_zero_values( @@ -1150,19 +1151,27 @@ impl SideMetadataSpec { data_end_addr: Address, visit_data: impl FnMut(Address), ) { - if self.uses_contiguous_side_metadata() { - // Contiguous side metadata - self.scan_non_zero_values_fast::(data_start_addr, data_end_addr, visit_data); + if self.uses_contiguous_side_metadata() && self.log_num_of_bits == 0 { + // Contiguous one-bit-per-region side metadata + // TODO: VO bits is one-bit-per-word. But if we want to scan other metadata (such as + // the forwarding bits which has two bits per word), we will need to refactor the + // algorithm of `scan_non_zero_values_fast`. + self.scan_non_zero_values_fast(data_start_addr, data_end_addr, visit_data); } else { - // TODO: We should be able to optimize further for this case. However, we need to be careful that the side metadata - // is not contiguous, and we need to skip to the next chunk's side metadata when we search to a different chunk. - // This won't be used for VO bit, as VO bit is global and is always contiguous. So for now, I am not bothered to do it. - warn!("We are trying to search non zero bits in an discontiguous side metadata. The performance is slow, as MMTk does not optimize for this case."); + // TODO: VO bits are always contiguous. But if we want to scan other metadata, such as + // side mark bits, we need to refactor `bulk_update_metadata` to support `FnMut`, too, + // and use it to apply `scan_non_zero_values_fast` on each contiguous side metadata + // range. + warn!( + "We are trying to search for non zero bits in a discontiguous side metadata \ + or the metadata has more than one bit per region. \ + The performance is slow, as MMTk does not optimize for this case." + ); self.scan_non_zero_values_simple::(data_start_addr, data_end_addr, visit_data); } } - pub fn scan_non_zero_values_simple( + fn scan_non_zero_values_simple( &self, data_start_addr: Address, data_end_addr: Address, @@ -1182,13 +1191,14 @@ impl SideMetadataSpec { } } - pub fn scan_non_zero_values_fast( + fn scan_non_zero_values_fast( &self, data_start_addr: Address, data_end_addr: Address, mut visit_data: impl FnMut(Address), ) { debug_assert!(self.uses_contiguous_side_metadata()); + debug_assert_eq!(self.log_num_of_bits, 0); // Then figure out the start and end metadata address and bits. let start_meta_addr = address_to_contiguous_meta_address(self, data_start_addr); @@ -1196,13 +1206,11 @@ impl SideMetadataSpec { let end_meta_addr = address_to_contiguous_meta_address(self, data_end_addr); let end_meta_shift = meta_byte_lshift(self, data_end_addr); - let mut check_bytes = |range| { + let mut visitor = |range| { match range { BitByteRange::Bytes { start, end } => { helpers::scan_non_zero_bits_in_metadata_bytes(start, end, &mut |addr, bit| { - visit_data(helpers::contiguous_meta_address_to_address( - self, addr, bit as u8, - )); + visit_data(helpers::contiguous_meta_address_to_address(self, addr, bit)); }); } BitByteRange::BitsInByte { @@ -1211,12 +1219,10 @@ impl SideMetadataSpec { bit_end, } => helpers::scan_non_zero_bits_in_metadata_bits( addr, - bit_start as u32, - bit_end as u32, + bit_start, + bit_end, &mut |addr, bit| { - visit_data(helpers::contiguous_meta_address_to_address( - self, addr, bit as u8, - )); + visit_data(helpers::contiguous_meta_address_to_address(self, addr, bit)); }, ), } @@ -1229,7 +1235,7 @@ impl SideMetadataSpec { end_meta_addr, end_meta_shift, false, - &mut check_bytes, + &mut visitor, ); } } diff --git a/src/util/metadata/side_metadata/helpers.rs b/src/util/metadata/side_metadata/helpers.rs index 1ffab4bf34..9eb8823649 100644 --- a/src/util/metadata/side_metadata/helpers.rs +++ b/src/util/metadata/side_metadata/helpers.rs @@ -1,3 +1,4 @@ +use super::ranges::BitOffset; use super::SideMetadataSpec; use crate::util::constants::LOG_BYTES_IN_PAGE; use crate::util::constants::{BITS_IN_WORD, BYTES_IN_PAGE, LOG_BITS_IN_BYTE}; @@ -272,7 +273,7 @@ fn find_last_non_zero_bit_in_u8(byte_value: u8) -> Option { pub fn scan_non_zero_bits_in_metadata_bytes( meta_start: Address, meta_end: Address, - visit_bit: &mut impl FnMut(Address, u32), + visit_bit: &mut impl FnMut(Address, BitOffset), ) { use crate::util::constants::BYTES_IN_ADDRESS; @@ -296,23 +297,23 @@ pub fn scan_non_zero_bits_in_metadata_bytes( } } -pub fn scan_non_zero_bits_in_metadata_word( +fn scan_non_zero_bits_in_metadata_word( meta_addr: Address, mut word: usize, - visit_bit: &mut impl FnMut(Address, u32), + visit_bit: &mut impl FnMut(Address, BitOffset), ) { while word != 0 { let bit = word.trailing_zeros(); - visit_bit(meta_addr, bit); + visit_bit(meta_addr, bit as u8); word = word & (word - 1); } } pub fn scan_non_zero_bits_in_metadata_bits( meta_addr: Address, - bit_start: u32, - bit_end: u32, - visit_bit: &mut impl FnMut(Address, u32), + bit_start: BitOffset, + bit_end: BitOffset, + visit_bit: &mut impl FnMut(Address, BitOffset), ) { let byte = unsafe { meta_addr.load::() }; for bit in bit_start..bit_end { From 6eb3073eafaa0f460257da511bf753ed7847bdf6 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 5 Aug 2024 19:10:07 +0800 Subject: [PATCH 16/28] Mock test for heap traversal --- .../mock_tests/mock_test_heap_traversal.rs | 87 +++++++++++++++++++ src/vm/tests/mock_tests/mod.rs | 2 + 2 files changed, 89 insertions(+) create mode 100644 src/vm/tests/mock_tests/mock_test_heap_traversal.rs diff --git a/src/vm/tests/mock_tests/mock_test_heap_traversal.rs b/src/vm/tests/mock_tests/mock_test_heap_traversal.rs new file mode 100644 index 0000000000..430c46ff2a --- /dev/null +++ b/src/vm/tests/mock_tests/mock_test_heap_traversal.rs @@ -0,0 +1,87 @@ +// GITHUB-CI: MMTK_PLAN=NoGC,MarkSweep,MarkCompact,SemiSpace,Immix +// GITHUB-CI: FEATURES=vo_bit + +// Note on the plans chosen for CI: +// - Those plans cover the MarkSweepSpace, MarkCompactSpace, CopySpace and ImmixSpace. +// Each plan other than NoGC also include ImmortalSpace and LOS. +// - PageProtect consumes too much memory and the test will fail with the default heap size +// chosen by the MutatorFixture. + +use std::collections::HashSet; + +use constants::BYTES_IN_WORD; + +use super::mock_test_prelude::*; + +use crate::{util::*, AllocationSemantics, MMTK}; + +lazy_static! { + static ref FIXTURE: Fixture = Fixture::new(); +} + +pub fn get_all_objects(mmtk: &'static MMTK) -> HashSet { + let mut result = HashSet::new(); + mmtk.enumerate_objects(|object| { + result.insert(object); + }); + result +} + +#[test] +pub fn test_heap_traversal() { + with_mockvm( + default_setup, + || { + FIXTURE.with_fixture_mut(|fixture| { + let mmtk = fixture.mmtk(); + let traversal0 = get_all_objects(mmtk); + assert!(traversal0.is_empty()); + + let mutator = &mut fixture.mutator; + + let align = BYTES_IN_WORD; + + let mut new_obj = |size: usize, semantics: AllocationSemantics| { + let start = memory_manager::alloc(mutator, size, align, 0, semantics); + let object = MockVM::object_start_to_ref(start); + memory_manager::post_alloc(mutator, object, size, semantics); + object + }; + + let mut known_objects = HashSet::new(); + + let mut new_and_assert = |size: usize, semantics: AllocationSemantics| { + let object = new_obj(size, semantics); // a random size + known_objects.insert(object); + let traversal = get_all_objects(mmtk); + assert_eq!(traversal, known_objects); + }; + + { + use AllocationSemantics::*; + + // Add some single objects. Size doesn't matter. + new_and_assert(40, Default); + new_and_assert(64, Default); + new_and_assert(96, Immortal); + new_and_assert(131000, Los); + + // Allocate enough memory to fill up a 64KB Immix block + for _ in 0..1000 { + new_and_assert(112, Default); + } + // Allocate a few objects in the immortal space. + for _ in 0..10 { + new_and_assert(64, Immortal); + } + // And a few objects in the LOS. + for _ in 0..3 { + // The MutatorFixture only has 1MB of memory. Don't allocate too much. + new_and_assert(65504, Immortal); + } + } + }); + }, + no_cleanup, + ) +} diff --git a/src/vm/tests/mock_tests/mod.rs b/src/vm/tests/mock_tests/mod.rs index 751bc58c08..114d8f1859 100644 --- a/src/vm/tests/mock_tests/mod.rs +++ b/src/vm/tests/mock_tests/mod.rs @@ -35,6 +35,8 @@ mod mock_test_conservatism; #[cfg(target_os = "linux")] mod mock_test_handle_mmap_conflict; mod mock_test_handle_mmap_oom; +#[cfg(feature = "vo_bit")] +mod mock_test_heap_traversal; mod mock_test_init_fork; #[cfg(feature = "is_mmtk_object")] mod mock_test_internal_ptr_before_object_ref; From f3c80aff16b5beeb737f724e02a0d0f41658654f Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 5 Aug 2024 20:57:35 +0800 Subject: [PATCH 17/28] Add benchmark for bitmap scanning --- Cargo.toml | 1 + benches/bulk_meta/bscan.rs | 88 +++++++++++++++++++++++ benches/bulk_meta/mod.rs | 1 + benches/main.rs | 5 +- src/util/metadata/side_metadata/global.rs | 6 +- src/util/metadata/side_metadata/mod.rs | 6 ++ src/util/object_enum.rs | 2 +- 7 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 benches/bulk_meta/bscan.rs diff --git a/Cargo.toml b/Cargo.toml index d039991ebd..36a353dea4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ sysinfo = "0.30.9" [dev-dependencies] paste = "1.0.8" rand = "0.8.5" +rand_chacha = "0.3.1" criterion = "0.5.1" [build-dependencies] diff --git a/benches/bulk_meta/bscan.rs b/benches/bulk_meta/bscan.rs new file mode 100644 index 0000000000..0f3944ccfe --- /dev/null +++ b/benches/bulk_meta/bscan.rs @@ -0,0 +1,88 @@ +//! Benchmarks for scanning side metadata for non-zero bits. + +use criterion::Criterion; +use mmtk::util::{ + constants::LOG_BITS_IN_WORD, + metadata::side_metadata::bench::scan_non_zero_bits_in_metadata_bytes, Address, +}; +use rand::{seq::IteratorRandom, SeedableRng}; +use rand_chacha::ChaCha8Rng; + +fn allocate_aligned(size: usize) -> Address { + let ptr = unsafe { + std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align(size, size).unwrap()) + }; + Address::from_mut_ptr(ptr) +} + +const BLOCK_BYTES: usize = 32768usize; // Match an Immix block size. + +// Asssume one-bit-per-word metadata (matching VO bits). +const BLOCK_META_BYTES: usize = BLOCK_BYTES >> LOG_BITS_IN_WORD; + +/// Set this many distinct bits in the bitmap. +const NUM_OBJECTS: usize = 200; + +/// Get a deterministic seeded Rng. +fn get_rng() -> ChaCha8Rng { + // Create an Rng from a seed and an explicit Rng type. + // Not secure at all, but completely deterministic and reproducible. + // The following seed is read from /dev/random + const SEED64: u64 = 0x4050cb1b5ab26c70; + ChaCha8Rng::seed_from_u64(SEED64) +} + +/// A bitmap, with known location of each bit for assertion. +struct PreparedBitmap { + start: Address, + end: Address, + set_bits: Vec<(Address, u8)>, +} + +/// Make a bitmap of the desired size and set bits. +fn make_standard_bitmap() -> PreparedBitmap { + let start = allocate_aligned(BLOCK_META_BYTES); + let end = start + BLOCK_META_BYTES; + let mut rng = get_rng(); + + let mut set_bits = (0..(BLOCK_BYTES >> LOG_BITS_IN_WORD)) + .choose_multiple(&mut rng, NUM_OBJECTS) + .iter() + .map(|total_bit_offset| { + let word_offset = total_bit_offset >> LOG_BITS_IN_WORD; + let bit_offset = total_bit_offset & ((1 << LOG_BITS_IN_WORD) - 1); + (start + (word_offset << LOG_BITS_IN_WORD), bit_offset as u8) + }) + .collect::>(); + + set_bits.sort(); + + for (addr, bit) in set_bits.iter() { + let word = unsafe { addr.load::() }; + let new_word = word | (1 << bit); + unsafe { addr.store::(new_word) }; + } + + PreparedBitmap { + start, + end, + set_bits, + } +} + +pub fn bench(c: &mut Criterion) { + c.bench_function("bscan_block", |b| { + let bitmap = make_standard_bitmap(); + let mut holder: Vec<(Address, u8)> = Vec::with_capacity(NUM_OBJECTS); + + b.iter(|| { + holder.clear(); + scan_non_zero_bits_in_metadata_bytes(bitmap.start, bitmap.end, &mut |addr, shift| { + holder.push((addr, shift)); + }); + }); + + assert_eq!(holder.len(), NUM_OBJECTS); + assert_eq!(holder, bitmap.set_bits); + }); +} diff --git a/benches/bulk_meta/mod.rs b/benches/bulk_meta/mod.rs index 1b052d613f..9292ac806c 100644 --- a/benches/bulk_meta/mod.rs +++ b/benches/bulk_meta/mod.rs @@ -1 +1,2 @@ +pub mod bscan; pub mod bzero_bset; diff --git a/benches/main.rs b/benches/main.rs index b982b2d322..a25de7008d 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -46,7 +46,10 @@ pub fn bench_main(_c: &mut Criterion) { // Some benchmarks rely on the "bench" feature to expose some private functions. // Run them with `cargo bench --features bench`. #[cfg(feature = "bench")] - bulk_meta::bzero_bset::bench(_c); + { + bulk_meta::bzero_bset::bench(_c); + bulk_meta::bscan::bench(_c); + } } criterion_group!(benches, bench_main); diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 9107e5e6e1..d3418013ca 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -1149,7 +1149,7 @@ impl SideMetadataSpec { &self, data_start_addr: Address, data_end_addr: Address, - visit_data: impl FnMut(Address), + visit_data: &mut impl FnMut(Address), ) { if self.uses_contiguous_side_metadata() && self.log_num_of_bits == 0 { // Contiguous one-bit-per-region side metadata @@ -1175,7 +1175,7 @@ impl SideMetadataSpec { &self, data_start_addr: Address, data_end_addr: Address, - mut visit_data: impl FnMut(Address), + visit_data: &mut impl FnMut(Address), ) { let region_bytes = 1usize << self.log_bytes_in_region; @@ -1195,7 +1195,7 @@ impl SideMetadataSpec { &self, data_start_addr: Address, data_end_addr: Address, - mut visit_data: impl FnMut(Address), + visit_data: &mut impl FnMut(Address), ) { debug_assert!(self.uses_contiguous_side_metadata()); debug_assert_eq!(self.log_num_of_bits, 0); diff --git a/src/util/metadata/side_metadata/mod.rs b/src/util/metadata/side_metadata/mod.rs index 612c69223f..ee88a0bd25 100644 --- a/src/util/metadata/side_metadata/mod.rs +++ b/src/util/metadata/side_metadata/mod.rs @@ -22,3 +22,9 @@ pub(crate) use helpers::*; #[allow(unused_imports)] pub(crate) use helpers_32::*; pub(crate) use sanity::SideMetadataSanity; + +/// Exposed methods for benchmarking. +#[cfg(feature = "bench")] +pub mod bench { + pub use super::helpers::scan_non_zero_bits_in_metadata_bytes; +} diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs index f2ce2bde8a..8d383cf2a8 100644 --- a/src/util/object_enum.rs +++ b/src/util/object_enum.rs @@ -62,7 +62,7 @@ where } fn visit_address_range(&mut self, addr_range: std::ops::Range
) { - VO_BIT.scan_non_zero_values::(addr_range.start, addr_range.end, |address| { + VO_BIT.scan_non_zero_values::(addr_range.start, addr_range.end, &mut |address| { let object = ObjectReference::from_address::(address); (self.object_callback)(object); }) From 323d39eb69d045f947eb94ab8945916ffcc56985 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 6 Aug 2024 13:46:41 +0800 Subject: [PATCH 18/28] Revert criterion version update --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d039991ebd..55bffbfc84 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,7 +55,7 @@ sysinfo = "0.30.9" [dev-dependencies] paste = "1.0.8" rand = "0.8.5" -criterion = "0.5.1" +criterion = "0.4" [build-dependencies] built = { version = "0.7.1", features = ["git2"] } From 95cd6df8c6d9e89a7ce9054ac1bee6984cb8cc9e Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 6 Aug 2024 14:14:37 +0800 Subject: [PATCH 19/28] Fix comments --- src/util/metadata/side_metadata/ranges.rs | 29 +++++++++++++++-------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs index 66d89209da..48d12a7068 100644 --- a/src/util/metadata/side_metadata/ranges.rs +++ b/src/util/metadata/side_metadata/ranges.rs @@ -1,4 +1,14 @@ -//! Data types for visiting metadata ranges at different granularities +//! Data types for visiting metadata ranges at different granularities. +//! +//! Currently, the `break_bit_range` function can break a bit range into sub-ranges of whole bytes +//! and in-byte bits. +//! +//! TODO: +//! +//! - Add a function to break a byte range into sub-ranges of whole words and in-word bytes. +//! - And use it for searching side metadata for non-zero bits. +//! - Add a function to break a byte range at chunk boundaries. +//! - And use it for visiting discontiguous side metadata in bulk. use crate::util::Address; @@ -40,15 +50,14 @@ pub enum BitByteRange { /// guarantee that the data address range can be mapped to whole metadata bytes, we have to deal /// with visiting only a bit range in a metadata byte. /// -/// The bit range starts at the bit at index `meta_start_bit` in the byte at address -/// `meta_start_addr`, and ends at (but does not include) the bit at index `meta_end_bit` in the -/// byte at address `meta_end_addr`. +/// The bit range starts at the bit at index `start_bit` in the byte at address `start_addr`, and +/// ends at (but does not include) the bit at index `end_bit` in the byte at address `end_addr`. /// /// Arguments: -/// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). -/// Otherwise, we iterate backwards (from end/high address to start/low address). -/// * `visitor`: The callback that visits ranges of bits or bytes. It returns whether the itertion -/// is early terminated. +/// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). +/// Otherwise, we iterate backwards (from end/high address to start/low address). +/// * `visitor`: The callback that visits ranges of bits or bytes. It returns whether the +/// itertion is early terminated. /// /// Returns true if we iterate through every bits in the range. Return false if we abort iteration /// early. @@ -68,7 +77,7 @@ where return false; } - // If the range is already byte-aligned, visit whole bits. + // If the range is already byte-aligned, visit the entire range as whole bytes. if start_bit == 0 && end_bit == 0 { return visitor(BitByteRange::Bytes { start: start_addr, @@ -150,7 +159,7 @@ where } }; - // Update each segments. + // Visit the three segments forwards or backwards. if forwards { visit_start(visitor) || visit_middle(visitor) || visit_end(visitor) } else { From 8b91a21c30be63419490853f49a50e66eaf53486 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 6 Aug 2024 14:36:37 +0800 Subject: [PATCH 20/28] Support VMSpace --- src/policy/vmspace.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/policy/vmspace.rs b/src/policy/vmspace.rs index 5d38a77042..6a9f7902c3 100644 --- a/src/policy/vmspace.rs +++ b/src/policy/vmspace.rs @@ -11,6 +11,7 @@ use crate::util::heap::PageResource; use crate::util::metadata::mark_bit::MarkState; #[cfg(feature = "set_unlog_bits_vm_space")] use crate::util::metadata::MetadataSpec; +use crate::util::object_enum::ObjectEnumerator; use crate::util::opaque_pointer::*; use crate::util::ObjectReference; use crate::vm::{ObjectModel, VMBinding}; @@ -145,6 +146,13 @@ impl Space for VMSpace { // mmapped by the runtime rather than us). So we we use SFT here. SFT_MAP.get_checked(start).name() == self.name() } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + let external_pages = self.pr.get_external_pages(); + for ep in external_pages.iter() { + enumerator.visit_address_range(ep.start..ep.end); + } + } } use crate::scheduler::GCWorker; From 70e893387c9fd292c2aab29e7e681234f425fe6b Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 6 Aug 2024 15:12:49 +0800 Subject: [PATCH 21/28] Renamed to Space::enumerate_object_coarse --- src/mmtk.rs | 2 +- src/policy/copyspace.rs | 2 +- src/policy/immix/immixspace.rs | 2 +- src/policy/immortalspace.rs | 2 +- src/policy/largeobjectspace.rs | 2 +- src/policy/lockfreeimmortalspace.rs | 2 +- src/policy/markcompactspace.rs | 2 +- src/policy/marksweepspace/malloc_ms/global.rs | 2 +- src/policy/marksweepspace/native_ms/global.rs | 2 +- src/policy/space.rs | 28 +++++++++++++------ src/policy/vmspace.rs | 2 +- 11 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index d3d7f353f7..30f1c2bcfc 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -482,7 +482,7 @@ impl MMTK { let mut enumerator = object_enum::ClosureObjectEnumerator::<_, VM>::new(f); let plan = self.get_plan(); plan.for_each_space(&mut |space| { - space.enumerate_objects(&mut enumerator); + space.enumerate_objects_coarse(&mut enumerator); }) } } diff --git a/src/policy/copyspace.rs b/src/policy/copyspace.rs index 84e875191e..00cd632b5c 100644 --- a/src/policy/copyspace.rs +++ b/src/policy/copyspace.rs @@ -135,7 +135,7 @@ impl Space for CopySpace { self.common.copy = semantics; } - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); } } diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index 0e3d303202..a419a9bb28 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -191,7 +191,7 @@ impl Space for ImmixSpace { panic!("We do not use SFT to trace objects for Immix. set_copy_context() cannot be used.") } - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_chunk_map::(enumerator, &self.chunk_map); } } diff --git a/src/policy/immortalspace.rs b/src/policy/immortalspace.rs index 2ebb14e4d4..b686429b5e 100644 --- a/src/policy/immortalspace.rs +++ b/src/policy/immortalspace.rs @@ -114,7 +114,7 @@ impl Space for ImmortalSpace { panic!("immortalspace only releases pages enmasse") } - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); } } diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index 17427cab9b..c5236cfa51 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -177,7 +177,7 @@ impl Space for LargeObjectSpace { self.pr.release_pages(start); } - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { self.treadmill.enumerate_objects_coarse(enumerator); } } diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs index 00a265cd15..f13daeb374 100644 --- a/src/policy/lockfreeimmortalspace.rs +++ b/src/policy/lockfreeimmortalspace.rs @@ -168,7 +168,7 @@ impl Space for LockFreeImmortalSpace { .verify_metadata_context(std::any::type_name::(), &self.metadata) } - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { enumerator.visit_address_range(self.start..(self.start + self.total_bytes)); } } diff --git a/src/policy/markcompactspace.rs b/src/policy/markcompactspace.rs index bc0f5659c2..8f781f1ae1 100644 --- a/src/policy/markcompactspace.rs +++ b/src/policy/markcompactspace.rs @@ -133,7 +133,7 @@ impl Space for MarkCompactSpace { panic!("markcompactspace only releases pages enmasse") } - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); } } diff --git a/src/policy/marksweepspace/malloc_ms/global.rs b/src/policy/marksweepspace/malloc_ms/global.rs index dbe386db5c..621a0d7993 100644 --- a/src/policy/marksweepspace/malloc_ms/global.rs +++ b/src/policy/marksweepspace/malloc_ms/global.rs @@ -231,7 +231,7 @@ impl Space for MallocSpace { .verify_metadata_context(std::any::type_name::(), &self.metadata) } - fn enumerate_objects(&self, _enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, _enumerator: &mut dyn ObjectEnumerator) { unimplemented!() } } diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs index 4a86dda6fa..e3d7964513 100644 --- a/src/policy/marksweepspace/native_ms/global.rs +++ b/src/policy/marksweepspace/native_ms/global.rs @@ -249,7 +249,7 @@ impl Space for MarkSweepSpace { todo!() } - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_chunk_map::(enumerator, &self.chunk_map); } } diff --git a/src/policy/space.rs b/src/policy/space.rs index eddc588c7c..3986c5ed58 100644 --- a/src/policy/space.rs +++ b/src/policy/space.rs @@ -350,17 +350,27 @@ pub trait Space: 'static + SFT + Sync + Downcast { .verify_metadata_context(std::any::type_name::(), &self.common().metadata) } - /// Enumerate ranges of addresses that may contain objects. Used for enumerating objects in the - /// space. Implementors should call `f` for each address range that may contain objects, and - /// may ignore address ranges that are guaranteed not to include objects. The address range - /// will then be linearly scanned. + /// Enumerate objects at a coarser granularity. /// - /// # Implementation consideration + /// Implementers can use the `enumerator` to report /// - /// Because `Space` is a trait object type and `f` is a `dyn` reference, each invocation of `f` - /// involves a dynamic dispatching. The overhead is OK if we call it a block at a time, but it - /// will be too costly if we call a `dyn` callback for each object. - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator); + /// - individual objects within the space using `enumerator.visit_object`, and + /// - ranges of address that may contain objects using `enumerator.visit_address_range`. The + /// caller will then enumerate objects in the range using the VO bits metadata. + /// + /// Each object in the space shall be covered by one of the two methods above. + /// + /// # Implementation considerations + /// + /// **Skipping empty ranges**: When enumerating address ranges, spaces can skip ranges (blocks, + /// chunks, etc.) that are guarenteed not to contain objects. + /// + /// **Dynamic dispatch**: Because `Space` is a trait object type and `enumerator` is a `dyn` + /// reference, invoking methods of `enumerator` involves a dynamic dispatching. But the + /// overhead is OK if we call it a block at a time because scanning the VO bits will dominate + /// the execution time. For LOS, it will be cheaper to enumerate individual objects than + /// scanning VO bits because it is sparse. + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator); } /// Print the VM map for a space. diff --git a/src/policy/vmspace.rs b/src/policy/vmspace.rs index 6a9f7902c3..0443f13802 100644 --- a/src/policy/vmspace.rs +++ b/src/policy/vmspace.rs @@ -147,7 +147,7 @@ impl Space for VMSpace { SFT_MAP.get_checked(start).name() == self.name() } - fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { let external_pages = self.pr.get_external_pages(); for ep in external_pages.iter() { enumerator.visit_address_range(ep.start..ep.end); From 4f6d1bd7341da9e2b45e214b5de689ba3b7debca Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 6 Aug 2024 15:50:00 +0800 Subject: [PATCH 22/28] Revert `Region::as_range()`. Just use start and end for now. In the future, we may introduce a proper `AddressRange` type and do a larger scale refactoring. --- src/policy/lockfreeimmortalspace.rs | 2 +- src/policy/vmspace.rs | 2 +- src/util/linear_scan.rs | 4 ---- src/util/object_enum.rs | 10 +++++----- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs index f13daeb374..25faa4bbfc 100644 --- a/src/policy/lockfreeimmortalspace.rs +++ b/src/policy/lockfreeimmortalspace.rs @@ -169,7 +169,7 @@ impl Space for LockFreeImmortalSpace { } fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { - enumerator.visit_address_range(self.start..(self.start + self.total_bytes)); + enumerator.visit_address_range(self.start, self.start + self.total_bytes); } } diff --git a/src/policy/vmspace.rs b/src/policy/vmspace.rs index 0443f13802..5eadb77cfc 100644 --- a/src/policy/vmspace.rs +++ b/src/policy/vmspace.rs @@ -150,7 +150,7 @@ impl Space for VMSpace { fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { let external_pages = self.pr.get_external_pages(); for ep in external_pages.iter() { - enumerator.visit_address_range(ep.start..ep.end); + enumerator.visit_address_range(ep.start, ep.end); } } } diff --git a/src/util/linear_scan.rs b/src/util/linear_scan.rs index 6b144ad5db..bf391c785d 100644 --- a/src/util/linear_scan.rs +++ b/src/util/linear_scan.rs @@ -108,10 +108,6 @@ pub trait Region: Copy + PartialEq + PartialOrd { fn end(&self) -> Address { self.start() + Self::BYTES } - /// Return the address range of the region. - fn as_range(&self) -> std::ops::Range
{ - self.start()..self.end() - } /// Return the next region after this one. fn next(&self) -> Self { self.next_nth(1) diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs index 8d383cf2a8..0d585c5fbb 100644 --- a/src/util/object_enum.rs +++ b/src/util/object_enum.rs @@ -26,7 +26,7 @@ pub trait ObjectEnumerator { /// Visit a single object. fn visit_object(&mut self, object: ObjectReference); /// Visit an address range that may contain objects. - fn visit_address_range(&mut self, addr_range: std::ops::Range
); + fn visit_address_range(&mut self, start: Address, end: Address); } /// An implementation of `ObjectEnumerator` that wraps a callback. @@ -61,8 +61,8 @@ where (self.object_callback)(object); } - fn visit_address_range(&mut self, addr_range: std::ops::Range
) { - VO_BIT.scan_non_zero_values::(addr_range.start, addr_range.end, &mut |address| { + fn visit_address_range(&mut self, start: Address, end: Address) { + VO_BIT.scan_non_zero_values::(start, end, &mut |address| { let object = ObjectReference::from_address::(address); (self.object_callback)(object); }) @@ -93,7 +93,7 @@ pub(crate) fn enumerate_blocks_from_chunk_map( if chunk_map.get(chunk) == ChunkState::Allocated { for block in chunk.iter_region::() { if block.may_have_objects() { - enumerator.visit_address_range(block.as_range()); + enumerator.visit_address_range(block.start(), block.end()); } } } @@ -107,6 +107,6 @@ pub(crate) fn enumerate_blocks_from_monotonic_page_resource( VM: VMBinding, { for (start, size) in pr.iterate_allocated_regions() { - enumerator.visit_address_range(start..(start + size)); + enumerator.visit_address_range(start, start + size); } } From 557e7c4d27bc73a93f23cd7fa16b424c110ac134 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 7 Aug 2024 14:19:31 +0800 Subject: [PATCH 23/28] Remove the "bench" feature It should have been removed while merging with master. --- Cargo.toml | 4 ---- src/util/metadata/side_metadata/global.rs | 22 ---------------------- 2 files changed, 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ff236d39ad..3c2fc8f179 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,10 +79,6 @@ mock_test = ["test_private"] # This feature will expose some private functions for testings or benchmarking. test_private = [] -# This feature is only used for benchmarks. -# It will expose some private functions for benchmarking. -bench = [] - # .github/scripts/ci-common.sh extracts features from the following part (including from comments). # So be careful when editing or adding stuff to the section below. diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 4edede5adc..8f13f90ab7 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -192,17 +192,6 @@ impl SideMetadataSpec { ); } - /// Expose `zero_meta_bits` when running `cargo bench`. - #[cfg(feature = "bench")] - pub fn bench_zero_meta_bits( - meta_start_addr: Address, - meta_start_bit: u8, - meta_end_addr: Address, - meta_end_bit: u8, - ) { - Self::zero_meta_bits(meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit) - } - /// This method is used for bulk setting side metadata for a data address range. pub(crate) fn set_meta_bits( meta_start_addr: Address, @@ -240,17 +229,6 @@ impl SideMetadataSpec { ); } - /// Expose `set_meta_bits` when running `cargo bench`. - #[cfg(feature = "bench")] - pub fn bench_set_meta_bits( - meta_start_addr: Address, - meta_start_bit: u8, - meta_end_addr: Address, - meta_end_bit: u8, - ) { - Self::set_meta_bits(meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit) - } - /// This method does bulk update for the given data range. It calculates the metadata bits for the given data range, /// and invoke the given method to update the metadata bits. pub(super) fn bulk_update_metadata( From ec194aa5c510aecf8c740919b9c052406e05e28b Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 7 Aug 2024 15:09:18 +0800 Subject: [PATCH 24/28] Comments --- src/mmtk.rs | 28 +++++++++++++++++++++++++++- src/util/object_enum.rs | 9 ++------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index 30f1c2bcfc..443a5e064c 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -471,7 +471,33 @@ impl MMTK { } /// Enumerate objects in all spaces in this MMTK instance. - /// The call-back function `f` is called for every object. + /// + /// The call-back function `f` is called for every object that has the valid object bit (VO + /// bit), i.e. objects that are allocated in the heap of this MMTK instance, but has not been + /// reclaimed, yet. + /// + /// # Interaction with allocation and GC + /// + /// This function does not mutate the heap. It is safe if multiple threads execute this + /// function concurrently during mutator time. + /// + /// This function will visit all objects that have been allocated at the time when this function + /// is called. But if new objects are allocated while this function is being executed, this + /// function may or may not visit objects allocated after this function started. + /// + /// Also note that when this function visits an object, it only guarantees that its VO bit must + /// have been set. It is not guaranteed if the object has been "fully initialized" in the sense + /// of the programming language the VM is implementing. For example, the object header and the + /// type information may not have been written. + /// + /// It has *undefined behavior* if GC happens while this function is being executed. The VM + /// binding must ensure GC does not start while executing this function. One way to prevent GC + /// from starting is not letting the current thread yield for GC. + /// + /// Some programming languages may provide an API that allows the user to allocate objects and + /// trigger GC while enumerating objects. The VM binding may use the callback of this function + /// to save all visited objects in an array and let the user visit the array after this function + /// returned. #[cfg(feature = "vo_bit")] pub fn enumerate_objects(&self, f: F) where diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs index 0d585c5fbb..6aff124d47 100644 --- a/src/util/object_enum.rs +++ b/src/util/object_enum.rs @@ -14,14 +14,9 @@ use super::{ Address, ObjectReference, }; -/// A trait for enumerating objects in spaces. +/// A trait for enumerating objects in spaces, used by [`Space::enumerate_objects_coarse`]. /// -/// This is a trait object type, so we avoid using generics. Because this trait may be used as a -/// `&mut dyn`, we avoid the cost of dynamic dispatching by allowing the user to supply an address -/// range instead of a single object reference. The implementor of this trait will use linear -/// scanning to find objects in the range in batch. But if the space is too sparse (e.g. LOS) and -/// the cost of linear scanning is greater than the dynamic dispatching, use `visit_object` -/// directly. +/// [`Space::enumerate_objects_coarse`]: crate::policy::space::Space::enumerate_objects_coarse pub trait ObjectEnumerator { /// Visit a single object. fn visit_object(&mut self, object: ObjectReference); From ce3c72344ddcb46ca87fc5020268eeb3c79a2915 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Aug 2024 14:53:36 +0800 Subject: [PATCH 25/28] Rename back to Space::enumerate_objects --- src/mmtk.rs | 2 +- src/policy/copyspace.rs | 2 +- src/policy/immix/immixspace.rs | 2 +- src/policy/immortalspace.rs | 2 +- src/policy/largeobjectspace.rs | 2 +- src/policy/lockfreeimmortalspace.rs | 2 +- src/policy/markcompactspace.rs | 2 +- src/policy/marksweepspace/malloc_ms/global.rs | 2 +- src/policy/marksweepspace/native_ms/global.rs | 2 +- src/policy/space.rs | 4 ++-- src/policy/vmspace.rs | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index 443a5e064c..b3291424ea 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -508,7 +508,7 @@ impl MMTK { let mut enumerator = object_enum::ClosureObjectEnumerator::<_, VM>::new(f); let plan = self.get_plan(); plan.for_each_space(&mut |space| { - space.enumerate_objects_coarse(&mut enumerator); + space.enumerate_objects(&mut enumerator); }) } } diff --git a/src/policy/copyspace.rs b/src/policy/copyspace.rs index 00cd632b5c..84e875191e 100644 --- a/src/policy/copyspace.rs +++ b/src/policy/copyspace.rs @@ -135,7 +135,7 @@ impl Space for CopySpace { self.common.copy = semantics; } - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); } } diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index a419a9bb28..0e3d303202 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -191,7 +191,7 @@ impl Space for ImmixSpace { panic!("We do not use SFT to trace objects for Immix. set_copy_context() cannot be used.") } - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_chunk_map::(enumerator, &self.chunk_map); } } diff --git a/src/policy/immortalspace.rs b/src/policy/immortalspace.rs index b686429b5e..2ebb14e4d4 100644 --- a/src/policy/immortalspace.rs +++ b/src/policy/immortalspace.rs @@ -114,7 +114,7 @@ impl Space for ImmortalSpace { panic!("immortalspace only releases pages enmasse") } - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); } } diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index c5236cfa51..17427cab9b 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -177,7 +177,7 @@ impl Space for LargeObjectSpace { self.pr.release_pages(start); } - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { self.treadmill.enumerate_objects_coarse(enumerator); } } diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs index 25faa4bbfc..dee3205181 100644 --- a/src/policy/lockfreeimmortalspace.rs +++ b/src/policy/lockfreeimmortalspace.rs @@ -168,7 +168,7 @@ impl Space for LockFreeImmortalSpace { .verify_metadata_context(std::any::type_name::(), &self.metadata) } - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { enumerator.visit_address_range(self.start, self.start + self.total_bytes); } } diff --git a/src/policy/markcompactspace.rs b/src/policy/markcompactspace.rs index 8f781f1ae1..bc0f5659c2 100644 --- a/src/policy/markcompactspace.rs +++ b/src/policy/markcompactspace.rs @@ -133,7 +133,7 @@ impl Space for MarkCompactSpace { panic!("markcompactspace only releases pages enmasse") } - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); } } diff --git a/src/policy/marksweepspace/malloc_ms/global.rs b/src/policy/marksweepspace/malloc_ms/global.rs index 621a0d7993..dbe386db5c 100644 --- a/src/policy/marksweepspace/malloc_ms/global.rs +++ b/src/policy/marksweepspace/malloc_ms/global.rs @@ -231,7 +231,7 @@ impl Space for MallocSpace { .verify_metadata_context(std::any::type_name::(), &self.metadata) } - fn enumerate_objects_coarse(&self, _enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, _enumerator: &mut dyn ObjectEnumerator) { unimplemented!() } } diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs index e3d7964513..4a86dda6fa 100644 --- a/src/policy/marksweepspace/native_ms/global.rs +++ b/src/policy/marksweepspace/native_ms/global.rs @@ -249,7 +249,7 @@ impl Space for MarkSweepSpace { todo!() } - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { object_enum::enumerate_blocks_from_chunk_map::(enumerator, &self.chunk_map); } } diff --git a/src/policy/space.rs b/src/policy/space.rs index 3986c5ed58..0329c995b7 100644 --- a/src/policy/space.rs +++ b/src/policy/space.rs @@ -350,7 +350,7 @@ pub trait Space: 'static + SFT + Sync + Downcast { .verify_metadata_context(std::any::type_name::(), &self.common().metadata) } - /// Enumerate objects at a coarser granularity. + /// Enumerate objects in the current space. /// /// Implementers can use the `enumerator` to report /// @@ -370,7 +370,7 @@ pub trait Space: 'static + SFT + Sync + Downcast { /// overhead is OK if we call it a block at a time because scanning the VO bits will dominate /// the execution time. For LOS, it will be cheaper to enumerate individual objects than /// scanning VO bits because it is sparse. - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator); + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator); } /// Print the VM map for a space. diff --git a/src/policy/vmspace.rs b/src/policy/vmspace.rs index 5eadb77cfc..38fc6011da 100644 --- a/src/policy/vmspace.rs +++ b/src/policy/vmspace.rs @@ -147,7 +147,7 @@ impl Space for VMSpace { SFT_MAP.get_checked(start).name() == self.name() } - fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { let external_pages = self.pr.get_external_pages(); for ep in external_pages.iter() { enumerator.visit_address_range(ep.start, ep.end); From 81a9faba50ac83a637f2cce34f51a49aa8a5ae3b Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Aug 2024 15:11:03 +0800 Subject: [PATCH 26/28] Use vo_bit::get_object_ref_for_vo_addr --- src/util/metadata/side_metadata/global.rs | 4 +++- src/util/metadata/vo_bit/mod.rs | 2 +- src/util/object_enum.rs | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 8f13f90ab7..ad39a3d565 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -1122,7 +1122,9 @@ impl SideMetadataSpec { /// This function searches the side metadata for the data address range from `data_start_addr` /// (inclusive) to `data_end_addr` (exclusive). The data address range must be fully mapped. /// - /// `visit_data` is called for each data address that has non-zero side metadata. + /// For each data region that has non-zero side metadata, `visit_data` is called with the lowest + /// address of that region. Note that it may not be the original address used to set the + /// metadata bits. pub fn scan_non_zero_values( &self, data_start_addr: Address, diff --git a/src/util/metadata/vo_bit/mod.rs b/src/util/metadata/vo_bit/mod.rs index e7b0f2551b..3ae0aee8d7 100644 --- a/src/util/metadata/vo_bit/mod.rs +++ b/src/util/metadata/vo_bit/mod.rs @@ -201,7 +201,7 @@ fn get_in_object_address_for_potential_object(potential_obj: Addr } /// Get the object reference from an aligned address where VO bit is set. -fn get_object_ref_for_vo_addr(vo_addr: Address) -> ObjectReference { +pub(crate) fn get_object_ref_for_vo_addr(vo_addr: Address) -> ObjectReference { let addr = vo_addr.offset(-VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET); let aligned = addr.align_up(ObjectReference::ALIGNMENT); unsafe { ObjectReference::from_raw_address_unchecked(aligned) } diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs index 6aff124d47..ff499af74a 100644 --- a/src/util/object_enum.rs +++ b/src/util/object_enum.rs @@ -10,7 +10,7 @@ use super::{ MonotonePageResource, }, linear_scan::Region, - metadata::side_metadata::spec_defs::VO_BIT, + metadata::{side_metadata::spec_defs::VO_BIT, vo_bit}, Address, ObjectReference, }; @@ -58,7 +58,7 @@ where fn visit_address_range(&mut self, start: Address, end: Address) { VO_BIT.scan_non_zero_values::(start, end, &mut |address| { - let object = ObjectReference::from_address::(address); + let object = vo_bit::get_object_ref_for_vo_addr::(address); (self.object_callback)(object); }) } From d11aa54fba672303f9d66822402a35eae59cba07 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Aug 2024 15:43:30 +0800 Subject: [PATCH 27/28] Update comments of MMTK::enumerate_objects State that it is an undefined behavior if it races with allocation or GC. Added comments on saving a list of object references keeping those references alive in subsequent GCs. --- src/mmtk.rs | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index b3291424ea..f5f7622130 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -476,28 +476,36 @@ impl MMTK { /// bit), i.e. objects that are allocated in the heap of this MMTK instance, but has not been /// reclaimed, yet. /// + /// # Notes about object initialization and finalization + /// + /// When this function visits an object, it only guarantees that its VO bit must have been set. + /// It is not guaranteed if the object has been "fully initialized" in the sense of the + /// programming language the VM is implementing. For example, the object header and the type + /// information may not have been written. + /// + /// It will also visit objects that have been "finalized" in the sense of the programming + /// langauge the VM is implementing, as long as the object has not been reclaimed by the GC, + /// yet. Be careful. If the object header is destroyed, it may not be safe to access such + /// objects in the high-level language. + /// /// # Interaction with allocation and GC /// /// This function does not mutate the heap. It is safe if multiple threads execute this /// function concurrently during mutator time. /// - /// This function will visit all objects that have been allocated at the time when this function - /// is called. But if new objects are allocated while this function is being executed, this - /// function may or may not visit objects allocated after this function started. - /// - /// Also note that when this function visits an object, it only guarantees that its VO bit must - /// have been set. It is not guaranteed if the object has been "fully initialized" in the sense - /// of the programming language the VM is implementing. For example, the object header and the - /// type information may not have been written. + /// It has *undefined behavior* if allocation or GC happens while this function is being + /// executed. The VM binding must ensure no threads are allocating and GC does not start while + /// executing this function. One way to do this is stopping all mutators before calling this + /// function. /// - /// It has *undefined behavior* if GC happens while this function is being executed. The VM - /// binding must ensure GC does not start while executing this function. One way to prevent GC - /// from starting is not letting the current thread yield for GC. + /// Some high-level languages may provide an API that allows the user to allocate objects and + /// trigger GC while enumerating objects. One example is [`ObjectSpace::each_object`][os_eo] in + /// Ruby. The VM binding may use the callback of this function to save all visited object + /// references and let the user visit those references after this function returns. Make sure + /// those saved references are in the root set or in an object that will live through GCs before + /// the high-level language finishes visiting the saved object references. /// - /// Some programming languages may provide an API that allows the user to allocate objects and - /// trigger GC while enumerating objects. The VM binding may use the callback of this function - /// to save all visited objects in an array and let the user visit the array after this function - /// returned. + /// [os_eo]: https://docs.ruby-lang.org/en/master/ObjectSpace.html#method-c-each_object #[cfg(feature = "vo_bit")] pub fn enumerate_objects(&self, f: F) where From 71302114c8299bd408839182b1b3a2872b886613 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Aug 2024 17:28:11 +0800 Subject: [PATCH 28/28] Rename remaining enumerate_objects_coarse --- src/policy/largeobjectspace.rs | 2 +- src/util/object_enum.rs | 4 ++-- src/util/treadmill.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index 17427cab9b..9d948201bb 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -178,7 +178,7 @@ impl Space for LargeObjectSpace { } fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { - self.treadmill.enumerate_objects_coarse(enumerator); + self.treadmill.enumerate_objects(enumerator); } } diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs index ff499af74a..61378f7a57 100644 --- a/src/util/object_enum.rs +++ b/src/util/object_enum.rs @@ -14,9 +14,9 @@ use super::{ Address, ObjectReference, }; -/// A trait for enumerating objects in spaces, used by [`Space::enumerate_objects_coarse`]. +/// A trait for enumerating objects in spaces, used by [`Space::enumerate_objects`]. /// -/// [`Space::enumerate_objects_coarse`]: crate::policy::space::Space::enumerate_objects_coarse +/// [`Space::enumerate_objects`]: crate::policy::space::Space::enumerate_objects pub trait ObjectEnumerator { /// Visit a single object. fn visit_object(&mut self, object: ObjectReference); diff --git a/src/util/treadmill.rs b/src/util/treadmill.rs index cbc6aaf84a..96b5eb9e7d 100644 --- a/src/util/treadmill.rs +++ b/src/util/treadmill.rs @@ -102,7 +102,7 @@ impl TreadMill { } } - pub(crate) fn enumerate_objects_coarse(&self, enumerator: &mut dyn ObjectEnumerator) { + pub(crate) fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { let mut visit_objects = |set: &Mutex>| { let set = set.lock().unwrap(); for object in set.iter() {