From 47ee126f91f25b911498b3d49c35f672f5e935f9 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 20 Jul 2023 14:36:19 +0800 Subject: [PATCH] Counting VM-allocated pages into heap size. (#866) Some VMs allocate memory outside the MMTk heap, using `malloc` or other allocation methods. Those memory can usually be reclaiming by the finalizers of dead object in the MMTk heap. This commit allows the VM to report the amount of such off-heap memory so that MMTk can trigger GC more promptly to reclaim such memory. --- Cargo.toml | 3 +++ src/plan/global.rs | 40 ++++++++++++++++++++++++++++++---------- src/vm/collection.rs | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index db4da419d8..dc47df85b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,9 @@ delegate = "0.9.0" downcast-rs = "1.1.1" enum-map = "2.4.2" env_logger = "0.10.0" +# We do not use this crate, but env_logger uses it. env_logger uses is_terminal 0.4.0. However, since 0.4.8, is_terminal requires Rust 1.63. +# So we fix on 0.4.7 here. Once we bump our MSRV, we can remove this. +is-terminal = "=0.4.7" itertools = "0.10.5" jemalloc-sys = { version = "0.5.3", features = ["disable_initial_exec_tls"], optional = true } lazy_static = "1.1" diff --git a/src/plan/global.rs b/src/plan/global.rs index 0a1dbb5f45..1340fffac2 100644 --- a/src/plan/global.rs +++ b/src/plan/global.rs @@ -25,7 +25,7 @@ use crate::util::metadata::side_metadata::SideMetadataSpec; use crate::util::options::Options; use crate::util::options::PlanSelector; use crate::util::statistics::stats::Stats; -use crate::util::ObjectReference; +use crate::util::{conversions, ObjectReference}; use crate::util::{VMMutatorThread, VMWorkerThread}; use crate::vm::*; use downcast_rs::Downcast; @@ -252,10 +252,28 @@ pub trait Plan: 'static + Sync + Downcast { // work fine for non-copying plans. For copying plans, the plan should override any of these methods // if necessary. - /// Get the number of pages that are reserved, including used pages and pages that will - /// be used (e.g. for copying). + /// Get the number of pages that are reserved, including pages used by MMTk spaces, pages that + /// will be used (e.g. for copying), and live pages allocated outside MMTk spaces as reported + /// by the VM binding. fn get_reserved_pages(&self) -> usize { - self.get_used_pages() + self.get_collection_reserved_pages() + let used_pages = self.get_used_pages(); + let collection_reserve = self.get_collection_reserved_pages(); + let vm_live_bytes = ::VMCollection::vm_live_bytes(); + // Note that `vm_live_bytes` may not be the exact number of bytes in whole pages. The VM + // binding is allowed to return an approximate value if it is expensive or impossible to + // compute the exact number of pages occupied. + let vm_live_pages = conversions::bytes_to_pages_up(vm_live_bytes); + let total = used_pages + collection_reserve + vm_live_pages; + + trace!( + "Reserved pages = {}, used pages: {}, collection reserve: {}, VM live pages: {}", + total, + used_pages, + collection_reserve, + vm_live_pages, + ); + + total } /// Get the total number of pages for the heap. @@ -266,6 +284,9 @@ pub trait Plan: 'static + Sync + Downcast { /// Get the number of pages that are still available for use. The available pages /// should always be positive or 0. fn get_available_pages(&self) -> usize { + let reserved_pages = self.get_reserved_pages(); + let total_pages = self.get_total_pages(); + // It is possible that the reserved pages is larger than the total pages so we are doing // a saturating subtraction to make sure we return a non-negative number. // For example, @@ -274,15 +295,14 @@ pub trait Plan: 'static + Sync + Downcast { // the reserved pages is larger than total pages after the copying GC (the reserved pages after a GC // may be larger than the reserved pages before a GC, as we may end up using more memory for thread local // buffers for copy allocators). + let available_pages = total_pages.saturating_sub(reserved_pages); trace!( "Total pages = {}, reserved pages = {}, available pages = {}", - self.get_total_pages(), - self.get_reserved_pages(), - self.get_reserved_pages() - .saturating_sub(self.get_reserved_pages()) + total_pages, + reserved_pages, + available_pages, ); - self.get_total_pages() - .saturating_sub(self.get_reserved_pages()) + available_pages } /// Get the number of pages that are reserved for collection. By default, we return 0. diff --git a/src/vm/collection.rs b/src/vm/collection.rs index c552842945..9caf05ff6f 100644 --- a/src/vm/collection.rs +++ b/src/vm/collection.rs @@ -111,4 +111,38 @@ pub trait Collection { /// Arguments: /// * `tls_worker`: The thread pointer for the worker thread performing this call. fn post_forwarding(_tls: VMWorkerThread) {} + + /// Return the amount of memory (in bytes) which the VM allocated outside the MMTk heap but + /// wants to include into the current MMTk heap size. MMTk core will consider the reported + /// memory as part of MMTk heap for the purpose of heap size accounting. + /// + /// This amount should include memory that is kept alive by heap objects and can be released by + /// executing finalizers (or other language-specific cleaning-up routines) that are executed + /// when the heap objects are dead. For example, if a language implementation allocates array + /// headers in the MMTk heap, but allocates their underlying buffers that hold the actual + /// elements using `malloc`, then those buffers should be included in this amount. When the GC + /// finds such an array dead, its finalizer shall `free` the buffer and reduce this amount. + /// + /// If possible, the VM should account off-heap memory in pages. That is, count the number of + /// pages occupied by off-heap objects, and report the number of bytes of those whole pages + /// instead of individual objects. Because the underlying operating system manages memory at + /// page granularity, the occupied pages (instead of individual objects) determine the memory + /// footprint of a process, and how much memory MMTk spaces can obtain from the OS. + /// + /// However, if the VM is incapable of accounting off-heap memory in pages (for example, if the + /// VM uses `malloc` and the implementation of `malloc` is opaque to the VM), the VM binding + /// can simply return the total number of bytes of those off-heap objects as an approximation. + /// + /// # Performance note + /// + /// This function will be called when MMTk polls for GC. It happens every time the MMTk + /// allocators have allocated a certain amount of memory, usually one or a few blocks. Because + /// this function is called very frequently, its implementation must be efficient. If it is + /// too expensive to compute the exact amount, an approximate value should be sufficient for + /// MMTk to trigger GC promptly in order to release off-heap memory, and keep the memory + /// footprint under control. + fn vm_live_bytes() -> usize { + // By default, MMTk assumes the amount of memory the VM allocates off-heap is negligible. + 0 + } }