From 77dec5d0f3caf813ff45a62bdca644f79c7fad1e Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 11 Jun 2024 16:06:59 +0800 Subject: [PATCH 1/4] Let MarkSweepSpace use BlockPageResource Let MarkSweepSpace use BlockPageResource to eliminate the lock contention when releasing pages in parallel. --- src/policy/marksweepspace/native_ms/block.rs | 4 ++++ src/policy/marksweepspace/native_ms/global.rs | 22 ++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/policy/marksweepspace/native_ms/block.rs b/src/policy/marksweepspace/native_ms/block.rs index d607f71b4e..6727430e9e 100644 --- a/src/policy/marksweepspace/native_ms/block.rs +++ b/src/policy/marksweepspace/native_ms/block.rs @@ -4,6 +4,7 @@ use atomic::Ordering; use super::BlockList; use super::MarkSweepSpace; +use crate::util::constants::LOG_BYTES_IN_PAGE; use crate::util::heap::chunk_map::*; use crate::util::linear_scan::Region; use crate::vm::ObjectModel; @@ -48,6 +49,9 @@ impl Region for Block { } impl Block { + /// Log pages in block + pub const LOG_PAGES: usize = Self::LOG_BYTES - LOG_BYTES_IN_PAGE as usize; + pub const METADATA_SPECS: [SideMetadataSpec; 7] = [ Self::MARK_TABLE, Self::NEXT_BLOCK_TABLE, diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs index 274c05648a..46b5a0b4a6 100644 --- a/src/policy/marksweepspace/native_ms/global.rs +++ b/src/policy/marksweepspace/native_ms/global.rs @@ -7,7 +7,7 @@ use crate::{ scheduler::{GCWorkScheduler, GCWorker}, util::{ copy::CopySemantics, - heap::{FreeListPageResource, PageResource}, + heap::{BlockPageResource, PageResource}, metadata::{self, side_metadata::SideMetadataSpec, MetadataSpec}, ObjectReference, }, @@ -64,7 +64,7 @@ pub enum BlockAcquireResult { /// | GC - End of GC | - | Merge the temp global lists | - | pub struct MarkSweepSpace { pub common: CommonSpace, - pr: FreeListPageResource, + pr: BlockPageResource, /// Allocation status for all chunks in MS space chunk_map: ChunkMap, /// Work packet scheduler @@ -80,6 +80,8 @@ pub struct MarkSweepSpace { abandoned_in_gc: Mutex, } +unsafe impl Sync for MarkSweepSpace {} + pub struct AbandonedBlockLists { pub available: BlockLists, pub unswept: BlockLists, @@ -278,9 +280,19 @@ impl MarkSweepSpace { let common = CommonSpace::new(args.into_policy_args(false, false, local_specs)); MarkSweepSpace { pr: if is_discontiguous { - FreeListPageResource::new_discontiguous(vm_map) + BlockPageResource::new_discontiguous( + Block::LOG_PAGES, + vm_map, + scheduler.num_workers(), + ) } else { - FreeListPageResource::new_contiguous(common.start, common.extent, vm_map) + BlockPageResource::new_contiguous( + Block::LOG_PAGES, + common.start, + common.extent, + vm_map, + scheduler.num_workers(), + ) }, common, chunk_map: ChunkMap::new(), @@ -354,7 +366,7 @@ impl MarkSweepSpace { self.block_clear_metadata(block); block.deinit(); - self.pr.release_pages(block.start()); + self.pr.release_block(block); } pub fn block_clear_metadata(&self, block: Block) { From a053306837c0018c55a14bae3b4f85489d80d6a0 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 11 Jun 2024 20:51:52 +0800 Subject: [PATCH 2/4] Flush BlockPageResource in the end of GC --- src/policy/marksweepspace/native_ms/global.rs | 5 +++++ src/scheduler/scheduler.rs | 10 ++++++---- tools/tracing/README.md | 2 ++ tools/tracing/timeline/capture.bt | 12 ++++++++++++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs index 46b5a0b4a6..06540eb3f3 100644 --- a/src/policy/marksweepspace/native_ms/global.rs +++ b/src/policy/marksweepspace/native_ms/global.rs @@ -359,6 +359,11 @@ impl MarkSweepSpace { #[cfg(debug_assertions)] from.assert_empty(); + + // BlockPageResource uses worker-local block queues to eliminate contention when releasing + // blocks, similar to how the MarkSweepSpace caches blocks in `abandoned_in_gc` before + // returning to the global pool. We flush the BlockPageResource, too. + self.pr.flush_all(); } /// Release a block. diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 967da81b6c..30143dc049 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -530,6 +530,12 @@ impl GCWorkScheduler { // Tell GC trigger that GC ended - this happens before we resume mutators. mmtk.gc_trigger.policy.on_gc_end(mmtk); + // All other workers are parked, so it is safe to access the Plan instance mutably. + probe!(mmtk, plan_end_of_gc_begin); + let plan_mut: &mut dyn Plan = unsafe { mmtk.get_plan_mut() }; + plan_mut.end_of_gc(worker.tls); + probe!(mmtk, plan_end_of_gc_end); + // Compute the elapsed time of the GC. let start_time = { let mut gc_start_time = worker.mmtk.state.gc_start_time.borrow_mut(); @@ -565,10 +571,6 @@ impl GCWorkScheduler { ); } - // All other workers are parked, so it is safe to access the Plan instance mutably. - let plan_mut: &mut dyn Plan = unsafe { mmtk.get_plan_mut() }; - plan_mut.end_of_gc(worker.tls); - #[cfg(feature = "extreme_assertions")] if crate::util::slot_logger::should_check_duplicate_slots(mmtk.get_plan()) { // reset the logging info at the end of each GC diff --git a/tools/tracing/README.md b/tools/tracing/README.md index 4af4ebe0dd..c80e492d13 100644 --- a/tools/tracing/README.md +++ b/tools/tracing/README.md @@ -38,6 +38,8 @@ Currently, the core provides the following tracepoints. argument is the length of the string. - `mmtk:alloc_slow_once_start()`: the allocation slow path starts. - `mmtk:alloc_slow_once_end()`: the allocation slow path ends. +- `mmtk:plan_end_of_gc_begin()`: before executing `Plan::end_of_gc`. +- `mmtk:plan_end_of_gc_end()`: after executing `Plan::end_of_gc`. ## Tracing tools diff --git a/tools/tracing/timeline/capture.bt b/tools/tracing/timeline/capture.bt index edd3057ea6..5315fdb3b7 100644 --- a/tools/tracing/timeline/capture.bt +++ b/tools/tracing/timeline/capture.bt @@ -79,4 +79,16 @@ usdt:$MMTK:mmtk:process_slots { } } +usdt:$MMTK:mmtk:plan_end_of_gc_begin { + if (@enable_print) { + printf("plan_end_of_gc,B,%d,%lu\n", tid, nsecs); + } +} + +usdt:$MMTK:mmtk:plan_end_of_gc_end { + if (@enable_print) { + printf("plan_end_of_gc,E,%d,%lu\n", tid, nsecs); + } +} + // vim: ft=bpftrace ts=4 sw=4 sts=4 et From 44d8557e54f7699b8c50e303652de9dabe038c18 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 12 Jun 2024 13:13:18 +0800 Subject: [PATCH 3/4] Add comment --- src/util/heap/freelistpageresource.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/util/heap/freelistpageresource.rs b/src/util/heap/freelistpageresource.rs index 1812464a12..5eeda9745f 100644 --- a/src/util/heap/freelistpageresource.rs +++ b/src/util/heap/freelistpageresource.rs @@ -320,6 +320,14 @@ impl FreeListPageResource { self.common.release_discontiguous_chunks(chunk); } + /// Release pages previously allocated by `alloc_pages`. + /// + /// Warning: This method acquires the mutex `self.sync`. If multiple threads release pages + /// concurrently, the lock contention will become a performance bottleneck. This is especially + /// problematic for plans that sweep objects in bulk in the `Release` stage. Spaces except the + /// large object space are recommended to use [`BlockPageResource`] whenever possible. + /// + /// [`BlockPageResource`]: crate::util::heap::blockpageresource::BlockPageResource pub fn release_pages(&self, first: Address) { debug_assert!(conversions::is_page_aligned(first)); let mut sync = self.sync.lock().unwrap(); From 85d01d5ec9d884ed60ba5809c47392f2886cc148 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 12 Jun 2024 16:56:22 +0800 Subject: [PATCH 4/4] No creating "default block" with zero. --- src/util/heap/blockpageresource.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/util/heap/blockpageresource.rs b/src/util/heap/blockpageresource.rs index 95a4926c04..274dc2e757 100644 --- a/src/util/heap/blockpageresource.rs +++ b/src/util/heap/blockpageresource.rs @@ -8,10 +8,12 @@ use crate::util::heap::pageresource::CommonPageResource; use crate::util::heap::space_descriptor::SpaceDescriptor; use crate::util::linear_scan::Region; use crate::util::opaque_pointer::*; +use crate::util::rust_util::zeroed_alloc::new_zeroed_vec; use crate::vm::*; use atomic::Ordering; use spin::RwLock; use std::cell::UnsafeCell; +use std::mem::MaybeUninit; use std::sync::atomic::AtomicUsize; use std::sync::Mutex; @@ -179,17 +181,29 @@ impl BlockPageResource { /// A block list that supports fast lock-free push/pop operations struct BlockQueue { + /// The number of elements in the queue. cursor: AtomicUsize, - data: UnsafeCell>, + /// The underlying data storage. + /// + /// - `UnsafeCell`: It may be accessed by multiple threads. + /// - `Box<[T]>`: It holds an array allocated on the heap. It cannot be resized, but can be + /// replaced with another array as a whole. + /// - `MaybeUninit`: It may contain uninitialized elements. + /// + /// The implementaiton of `BlockQueue` must ensure there is no data race, and it never reads + /// uninitialized elements. + data: UnsafeCell]>>, } impl BlockQueue { /// Create an array fn new() -> Self { - let default_block = B::from_aligned_address(Address::ZERO); + let zeroed_vec = unsafe { new_zeroed_vec(Self::CAPACITY) }; + let boxed_slice = zeroed_vec.into_boxed_slice(); + let data = UnsafeCell::new(boxed_slice); Self { cursor: AtomicUsize::new(0), - data: UnsafeCell::new(vec![default_block; Self::CAPACITY]), + data, } } } @@ -199,14 +213,14 @@ impl BlockQueue { /// Get an entry fn get_entry(&self, i: usize) -> B { - unsafe { (*self.data.get())[i] } + unsafe { (*self.data.get())[i].assume_init() } } /// Set an entry. /// /// It's unsafe unless the array is accessed by only one thread (i.e. used as a thread-local array). unsafe fn set_entry(&self, i: usize, block: B) { - (*self.data.get())[i] = block + (*self.data.get())[i].write(block); } /// Non-atomically push an element.