mmtk · qinsoon · Aug 8, 2023 · Mar 7, 2023 · Mar 7, 2023 · Mar 8, 2023
diff --git a/Cargo.toml b/Cargo.toml
@@ -125,6 +125,9 @@ work_packet_stats = []
 # Count the malloc'd memory into the heap size
 malloc_counted_size = []
 
+# Count the size of all live objects in GC
+count_live_bytes_in_gc = []
+
 # Do not modify the following line - ci-common.sh matches it
 # -- Mutally exclusive features --
 # Only one feature from each group can be provided. Otherwise build will fail.

diff --git a/src/memory_manager.rs b/src/memory_manager.rs
@@ -521,22 +521,38 @@ pub fn process_bulk(builder: &mut MMTKBuilder, options: &str) -> bool {
     builder.set_options_bulk_by_str(options)
 }
 
-/// Return used memory in bytes.
+/// Return used memory in bytes. MMTk accounts for memory in pages, thus this method always returns a value in
+/// page granularity.
 ///
 /// Arguments:
 /// * `mmtk`: A reference to an MMTk instance.
 pub fn used_bytes<VM: VMBinding>(mmtk: &MMTK<VM>) -> usize {
     mmtk.plan.get_used_pages() << LOG_BYTES_IN_PAGE
 }
 
-/// Return free memory in bytes.
+/// Return free memory in bytes. MMTk accounts for memory in pages, thus this method always returns a value in
+/// page granularity.
 ///
 /// Arguments:
 /// * `mmtk`: A reference to an MMTk instance.
 pub fn free_bytes<VM: VMBinding>(mmtk: &MMTK<VM>) -> usize {
     mmtk.plan.get_free_pages() << LOG_BYTES_IN_PAGE
 }
 
+/// Return the size of all the live objects in bytes in the last GC. MMTk usually accounts for memory in pages.
+/// This is a special method that we count the size of every live object in a GC, and sum up the total bytes.
+/// We provide this method so users can compare with `used_bytes` (which does page accounting), and know if
+/// the heap is fragmented.
+/// The value returned by this method is only updated when we finish tracing in a GC. A recommended timing
+/// to call this method is at the end of a GC (e.g. when the runtime is about to resume threads).
+#[cfg(feature = "count_live_bytes_in_gc")]
+pub fn live_bytes_in_last_gc<VM: VMBinding>(mmtk: &MMTK<VM>) -> usize {
+    mmtk.plan
+        .base()
+        .live_bytes_in_last_gc
+        .load(Ordering::SeqCst)
+}
+
 /// Return the starting address of the heap. *Note that currently MMTk uses
 /// a fixed address range as heap.*
 pub fn starting_heap_address() -> Address {

diff --git a/src/plan/global.rs b/src/plan/global.rs
@@ -398,6 +398,9 @@ pub struct BasePlan<VM: VMBinding> {
     /// A counteer that keeps tracks of the number of bytes allocated by malloc
     #[cfg(feature = "malloc_counted_size")]
     malloc_bytes: AtomicUsize,
+    /// This stores the size in bytes for all the live objects in last GC. This counter is only updated in the GC release phase.
+    #[cfg(feature = "count_live_bytes_in_gc")]
+    pub live_bytes_in_last_gc: AtomicUsize,
     /// Wrapper around analysis counters
     #[cfg(feature = "analysis")]
     pub analysis_manager: AnalysisManager<VM>,
@@ -552,6 +555,8 @@ impl<VM: VMBinding> BasePlan<VM> {
             allocation_bytes: AtomicUsize::new(0),
             #[cfg(feature = "malloc_counted_size")]
             malloc_bytes: AtomicUsize::new(0),
+            #[cfg(feature = "count_live_bytes_in_gc")]
+            live_bytes_in_last_gc: AtomicUsize::new(0),
             #[cfg(feature = "analysis")]
             analysis_manager,
         }

diff --git a/src/plan/markcompact/gc_work.rs b/src/plan/markcompact/gc_work.rs
@@ -43,6 +43,12 @@ impl<VM: VMBinding> GCWork<VM> for UpdateReferences<VM> {
         #[cfg(feature = "extreme_assertions")]
         mmtk.edge_logger.reset();
 
+        // We do two passes of transitive closures. We clear the live bytes from the first pass.
+        #[cfg(feature = "count_live_bytes_in_gc")]
+        mmtk.scheduler
+            .worker_group
+            .get_and_clear_worker_live_bytes();
+
         // TODO investigate why the following will create duplicate edges
         // scheduler.work_buckets[WorkBucketStage::RefForwarding]
         //     .add(ScanStackRoots::<ForwardingProcessEdges<VM>>::new());

diff --git a/src/plan/tracing.rs b/src/plan/tracing.rs
@@ -74,7 +74,7 @@ impl ObjectQueue for VectorQueue<ObjectReference> {
 /// A transitive closure visitor to collect all the edges of an object.
 pub struct ObjectsClosure<'a, E: ProcessEdgesWork> {
     buffer: VectorQueue<EdgeOf<E>>,
-    worker: &'a mut GCWorker<E::VM>,
+    pub(crate) worker: &'a mut GCWorker<E::VM>,
 }
 
 impl<'a, E: ProcessEdgesWork> ObjectsClosure<'a, E> {

diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs
@@ -124,6 +124,18 @@ impl<C: GCWorkContext + 'static> GCWork<C::VM> for Release<C> {
             let result = w.designated_work.push(Box::new(ReleaseCollector));
             debug_assert!(result.is_ok());
         }
+
+        #[cfg(feature = "count_live_bytes_in_gc")]
+        {
+            let live_bytes = mmtk
+                .scheduler
+                .worker_group
+                .get_and_clear_worker_live_bytes();
+            self.plan
+                .base()
+                .live_bytes_in_last_gc
+                .store(live_bytes, std::sync::atomic::Ordering::SeqCst);
+        }
     }
 }
 
@@ -232,6 +244,28 @@ impl<VM: VMBinding> GCWork<VM> for EndOfGC {
             self.elapsed.as_millis()
         );
 
+        #[cfg(feature = "count_live_bytes_in_gc")]
+        {
+            let live_bytes = mmtk
+                .plan
+                .base()
+                .live_bytes_in_last_gc
+                .load(std::sync::atomic::Ordering::SeqCst);
+            let used_bytes =
+                mmtk.plan.get_used_pages() << crate::util::constants::LOG_BYTES_IN_PAGE;
+            debug_assert!(
+                live_bytes <= used_bytes,
+                "Live bytes of all live objects ({} bytes) is larger than used pages ({} bytes), something is wrong.",
+                live_bytes, used_bytes
+            );
+            info!(
+                "Live objects = {} bytes ({:04.1}% of {} used pages)",
+                live_bytes,
+                live_bytes as f64 * 100.0 / used_bytes as f64,
+                mmtk.plan.get_used_pages()
+            );
+        }
+
         // We assume this is the only running work packet that accesses plan at the point of execution
         #[allow(clippy::cast_ref_to_mut)]
         let plan_mut: &mut dyn Plan<VM = VM> = unsafe { &mut *(&*mmtk.plan as *const _ as *mut _) };
@@ -323,6 +357,16 @@ impl<E: ProcessEdgesWork> ObjectTracerContext<E::VM> for ProcessEdgesWorkTracerC
     fn with_tracer<R, F>(&self, worker: &mut GCWorker<E::VM>, func: F) -> R
     where
         F: FnOnce(&mut Self::TracerType) -> R,
+    {
+        self.with_tracer_and_worker(worker, |tracer, _| func(tracer))
+    }
+}
+
+impl<E: ProcessEdgesWork> ProcessEdgesWorkTracerContext<E> {
+    // This Also exposes worker to the callback function. This is not a public method.
+    fn with_tracer_and_worker<R, F>(&self, worker: &mut GCWorker<E::VM>, func: F) -> R
+    where
+        F: FnOnce(&mut ProcessEdgesWorkTracer<E>, &mut GCWorker<E::VM>) -> R,
     /// FIXME: The current code works because of the unsafe method `ProcessEdgesWork::set_worker`. 
     /// FIXME: The current code works because of the unsafe method `ProcessEdgesWork::set_worker`. 
     {
         let mmtk = worker.mmtk;
 
@@ -339,7 +383,7 @@ impl<E: ProcessEdgesWork> ObjectTracerContext<E::VM> for ProcessEdgesWorkTracerC
         };
 
         // The caller can use the tracer here.
-        let result = func(&mut tracer);
+        let result = func(&mut tracer, worker);
 
         // Flush the queued nodes.
         tracer.flush_if_not_empty();
@@ -826,6 +870,12 @@ pub trait ScanObjectsWork<VM: VMBinding>: GCWork<VM> + Sized {
                     // If an object supports edge-enqueuing, we enqueue its edges.
                     <VM as VMBinding>::VMScanning::scan_object(tls, object, &mut closure);
                     self.post_scan_object(object);
+
+                    #[cfg(feature = "count_live_bytes_in_gc")]
+                    closure
+                        .worker
+                        .shared
+                        .increase_live_bytes(VM::VMObjectModel::get_current_size(object));
                 } else {
                     // If an object does not support edge-enqueuing, we have to use
                     // `Scanning::scan_object_and_trace_edges` and offload the job of updating the
@@ -845,7 +895,7 @@ pub trait ScanObjectsWork<VM: VMBinding>: GCWork<VM> + Sized {
                 phantom_data: PhantomData,
             };
 
-            object_tracer_context.with_tracer(worker, |object_tracer| {
+            object_tracer_context.with_tracer_and_worker(worker, |object_tracer, _worker| {
                 // Scan objects and trace their edges at the same time.
                 for object in scan_later.iter().copied() {
                     trace!("Scan object (node) {}", object);
@@ -855,6 +905,11 @@ pub trait ScanObjectsWork<VM: VMBinding>: GCWork<VM> + Sized {
                         object_tracer,
                     );
                     self.post_scan_object(object);
+
+                    #[cfg(feature = "count_live_bytes_in_gc")]
+                    _worker
+                        .shared
+                        .increase_live_bytes(VM::VMObjectModel::get_current_size(object));
                 }
             });
         }

diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs
@@ -31,6 +31,11 @@ pub fn current_worker_ordinal() -> Option<ThreadId> {
 pub struct GCWorkerShared<VM: VMBinding> {
     /// Worker-local statistics data.
     stat: AtomicRefCell<WorkerLocalStat<VM>>,
+    /// Accumulated bytes for live objects in this GC. When each worker scans
+    /// objects, we increase the live bytes. We get this value from each worker
+    /// at the end of a GC, and reset this counter.
+    #[cfg(feature = "count_live_bytes_in_gc")]
+    live_bytes: AtomicUsize,
     /// A queue of GCWork that can only be processed by the owned thread.
     ///
     /// Note: Currently, designated work cannot be added from the GC controller thread, or
@@ -45,10 +50,22 @@ impl<VM: VMBinding> GCWorkerShared<VM> {
     pub fn new(stealer: Option<Stealer<Box<dyn GCWork<VM>>>>) -> Self {
         Self {
             stat: Default::default(),
+            #[cfg(feature = "count_live_bytes_in_gc")]
+            live_bytes: AtomicUsize::new(0),
             designated_work: ArrayQueue::new(16),
             stealer,
         }
     }
+
+    #[cfg(feature = "count_live_bytes_in_gc")]
+    pub(crate) fn increase_live_bytes(&self, bytes: usize) {
+        self.live_bytes.fetch_add(bytes, Ordering::Relaxed);
+    }
+
+    #[cfg(feature = "count_live_bytes_in_gc")]
+    pub(crate) fn get_and_clear_live_bytes(&self) -> usize {
+        self.live_bytes.swap(0, Ordering::SeqCst)
+    }
 }
 
 /// A GC worker.  This part is privately owned by a worker thread.
@@ -285,6 +302,14 @@ impl<VM: VMBinding> WorkerGroup<VM> {
             .iter()
             .any(|w| !w.designated_work.is_empty())
     }
+
+    #[cfg(feature = "count_live_bytes_in_gc")]
+    pub fn get_and_clear_worker_live_bytes(&self) -> usize {
+        self.workers_shared
+            .iter()
+            .map(|w| w.get_and_clear_live_bytes())
+            .sum()
+    }
 }
 
 /// This ensures the worker always decrements the parked worker count on all control flow paths.