Skip to content

Commit

Permalink
Tune GNU malloc (#2299)
Browse files Browse the repository at this point in the history
## Issue Addressed

NA

## Proposed Changes

Modify the configuration of [GNU malloc](https://www.gnu.org/software/libc/manual/html_node/The-GNU-Allocator.html) to reduce memory footprint.

- Set `M_ARENA_MAX` to 4.
    - This reduces memory fragmentation at the cost of contention between threads.
- Set `M_MMAP_THRESHOLD` to 2mb
    - This means that any allocation >= 2mb is allocated via an anonymous mmap, instead of on the heap/arena. This reduces memory fragmentation since we don't need to keep growing the heap to find big contiguous slabs of free memory.
- ~~Run `malloc_trim` every 60 seconds.~~
    - ~~This shaves unused memory from the top of the heap, preventing the heap from constantly growing.~~
    - Removed, see: #2299 (comment)

*Note: this only provides memory savings on the Linux (glibc) platform.*
    
## Additional Info

I'm going to close #2288 in favor of this for the following reasons:

- I've managed to get the memory footprint *smaller* here than with jemalloc.
- This PR seems to be less of a dramatic change than bringing in the jemalloc dep.
- The changes in this PR are strictly runtime changes, so we can create CLI flags which disable them completely. Since this change is wide-reaching and complex, it's nice to have an easy "escape hatch" if there are undesired consequences.

## TODO

- [x] Allow configuration via CLI flags
- [x] Test on Mac
- [x] Test on RasPi.
- [x] Determine if GNU malloc is present?
    - I'm not quite sure how to detect for glibc.. This issue suggests we can't really: rust-lang/rust#33244
- [x] Make a clear argument regarding the affect of this on CPU utilization.
- [x] Test with higher `M_ARENA_MAX` values.
- [x] Test with longer trim intervals
- [x] Add some stats about memory savings
- [x] Remove `malloc_trim` calls & code
  • Loading branch information
paulhauner committed May 28, 2021
1 parent fdaeec6 commit 456b313
Show file tree
Hide file tree
Showing 16 changed files with 350 additions and 1 deletion.
13 changes: 13 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ members = [
"common/lockfile",
"common/logging",
"common/lru_cache",
"common/malloc_utils",
"common/remote_signer_consumer",
"common/sensitive_url",
"common/slot_clock",
Expand Down
1 change: 1 addition & 0 deletions beacon_node/http_metrics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ lazy_static = "1.4.0"
eth2 = { path = "../../common/eth2" }
lighthouse_version = { path = "../../common/lighthouse_version" }
warp_utils = { path = "../../common/warp_utils" }
malloc_utils = { path = "../../common/malloc_utils" }

[dev-dependencies]
tokio = { version = "1.1.0", features = ["sync"] }
Expand Down
2 changes: 2 additions & 0 deletions beacon_node/http_metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ pub struct Config {
pub listen_addr: Ipv4Addr,
pub listen_port: u16,
pub allow_origin: Option<String>,
pub allocator_metrics_enabled: bool,
}

impl Default for Config {
Expand All @@ -58,6 +59,7 @@ impl Default for Config {
listen_addr: Ipv4Addr::new(127, 0, 0, 1),
listen_port: 5054,
allow_origin: None,
allocator_metrics_enabled: true,
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions beacon_node/http_metrics/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::Context;
use beacon_chain::BeaconChainTypes;
use lighthouse_metrics::{Encoder, TextEncoder};
use malloc_utils::scrape_allocator_metrics;

pub use lighthouse_metrics::*;

Expand Down Expand Up @@ -41,6 +42,12 @@ pub fn gather_prometheus_metrics<T: BeaconChainTypes>(

warp_utils::metrics::scrape_health_metrics();

// It's important to ensure these metrics are explicitly enabled in the case that users aren't
// using glibc and this function causes panics.
if ctx.config.allocator_metrics_enabled {
scrape_allocator_metrics();
}

encoder
.encode(&lighthouse_metrics::gather(), &mut buffer)
.unwrap();
Expand Down
1 change: 1 addition & 0 deletions beacon_node/http_metrics/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ async fn returns_200_ok() {
listen_addr: Ipv4Addr::new(127, 0, 0, 1),
listen_port: 0,
allow_origin: None,
allocator_metrics_enabled: true,
},
chain: None,
db_path: None,
Expand Down
7 changes: 6 additions & 1 deletion beacon_node/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use clap::ArgMatches;
use clap_utils::BAD_TESTNET_DIR_MESSAGE;
use clap_utils::{flags::DISABLE_MALLOC_TUNING_FLAG, BAD_TESTNET_DIR_MESSAGE};
use client::{ClientConfig, ClientGenesis};
use directory::{DEFAULT_BEACON_NODE_DIR, DEFAULT_NETWORK_DIR, DEFAULT_ROOT_DIR};
use eth2_libp2p::{multiaddr::Protocol, Enr, Multiaddr, NetworkConfig, PeerIdSerialized};
Expand Down Expand Up @@ -156,6 +156,11 @@ pub fn get_config<E: EthSpec>(
);
}

// Do not scrape for malloc metrics if we've disabled tuning malloc as it may cause panics.
if cli_args.is_present(DISABLE_MALLOC_TUNING_FLAG) {
client_config.http_metrics.allocator_metrics_enabled = false;
}

/*
* Eth1
*/
Expand Down
3 changes: 3 additions & 0 deletions common/clap_utils/src/flags.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! CLI flags used across the Lighthouse code base can be located here.

pub const DISABLE_MALLOC_TUNING_FLAG: &str = "disable-malloc-tuning";
2 changes: 2 additions & 0 deletions common/clap_utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use ssz::Decode;
use std::path::PathBuf;
use std::str::FromStr;

pub mod flags;

pub const BAD_TESTNET_DIR_MESSAGE: &str = "The hard-coded testnet directory was invalid. \
This happens when Lighthouse is migrating between spec versions \
or when there is no default public network to connect to. \
Expand Down
14 changes: 14 additions & 0 deletions common/malloc_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "malloc_utils"
version = "0.1.0"
authors = ["Paul Hauner <paul@paulhauner.com>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
lighthouse_metrics = { path = "../lighthouse_metrics" }
lazy_static = "1.4.0"
libc = "0.2.79"
parking_lot = "0.11.0"
num_cpus = "1.13.0"
210 changes: 210 additions & 0 deletions common/malloc_utils/src/glibc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
//! Contains functions for tuning and controlling "The GNU Allocator", included in the `glibc`
//! library.
//!
//! https://www.gnu.org/software/libc/manual/html_node/The-GNU-Allocator.html
//!
//! These functions are generally only suitable for Linux systems.
use lazy_static::lazy_static;
use lighthouse_metrics::*;
use parking_lot::Mutex;
use std::env;
use std::os::raw::c_int;
use std::result::Result;

/// The value to be provided to `malloc_mmap_threshold`.
///
/// Value chosen so that values of the validators tree hash cache will *not* be allocated via
/// `mmap`.
///
/// The size of a single chunk is:
///
/// NODES_PER_VALIDATOR * VALIDATORS_PER_ARENA * 32 = 15 * 4096 * 32 = 1.875 MiB
const OPTIMAL_MMAP_THRESHOLD: c_int = 2 * 1_024 * 1_024;

/// The maximum number of arenas allowed to be created by malloc.
///
/// See `ArenaMaxSetting` docs for details.
const OPTIMAL_ARENA_MAX: ArenaMaxSetting = ArenaMaxSetting::NumCpus;

/// Constants used to configure malloc internals.
///
/// Source:
///
/// https://github.com/lattera/glibc/blob/895ef79e04a953cac1493863bcae29ad85657ee1/malloc/malloc.h#L115-L123
const M_MMAP_THRESHOLD: c_int = -4;
const M_ARENA_MAX: c_int = -8;

/// Environment variables used to configure malloc.
///
/// Source:
///
/// https://man7.org/linux/man-pages/man3/mallopt.3.html
const ENV_VAR_ARENA_MAX: &str = "MALLOC_ARENA_MAX";
const ENV_VAR_MMAP_THRESHOLD: &str = "MALLOC_MMAP_THRESHOLD_";

#[allow(dead_code)]
enum ArenaMaxSetting {
/// Do not set any value for MALLOC_ARENA_MAX, leave it as default.
DoNotSet,
/// Set a fixed value.
Fixed(c_int),
/// Read the number of CPUs at runtime and use that value.
NumCpus,
}

lazy_static! {
pub static ref GLOBAL_LOCK: Mutex<()> = <_>::default();
}

// Metrics for the malloc. For more information, see:
//
// https://man7.org/linux/man-pages/man3/mallinfo.3.html
lazy_static! {
pub static ref MALLINFO_ARENA: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_arena",
"The total amount of memory allocated by means other than mmap(2). \
This figure includes both in-use blocks and blocks on the free list.",
);
pub static ref MALLINFO_ORDBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_ordblks",
"The number of ordinary (i.e., non-fastbin) free blocks.",
);
pub static ref MALLINFO_SMBLKS: lighthouse_metrics::Result<IntGauge> =
try_create_int_gauge("mallinfo_smblks", "The number of fastbin free blocks.",);
pub static ref MALLINFO_HBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_hblks",
"The number of blocks currently allocated using mmap.",
);
pub static ref MALLINFO_HBLKHD: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_hblkhd",
"The number of bytes in blocks currently allocated using mmap.",
);
pub static ref MALLINFO_FSMBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_fsmblks",
"The total number of bytes in fastbin free blocks.",
);
pub static ref MALLINFO_UORDBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_uordblks",
"The total number of bytes used by in-use allocations.",
);
pub static ref MALLINFO_FORDBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_fordblks",
"The total number of bytes in free blocks.",
);
pub static ref MALLINFO_KEEPCOST: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_keepcost",
"The total amount of releasable free space at the top of the heap..",
);
}

/// Calls `mallinfo` and updates Prometheus metrics with the results.
pub fn scrape_mallinfo_metrics() {
// The docs for this function say it is thread-unsafe since it may return inconsistent results.
// Since these are just metrics it's not a concern to us if they're sometimes inconsistent.
//
// There exists a `malloc2` function, however it was release in February 2021 and this seems too
// recent to rely on.
//
// Docs:
//
// https://man7.org/linux/man-pages/man3/mallinfo.3.html
let mallinfo = mallinfo();

set_gauge(&MALLINFO_ARENA, mallinfo.arena as i64);
set_gauge(&MALLINFO_ORDBLKS, mallinfo.ordblks as i64);
set_gauge(&MALLINFO_SMBLKS, mallinfo.smblks as i64);
set_gauge(&MALLINFO_HBLKS, mallinfo.hblks as i64);
set_gauge(&MALLINFO_HBLKHD, mallinfo.hblkhd as i64);
set_gauge(&MALLINFO_FSMBLKS, mallinfo.fsmblks as i64);
set_gauge(&MALLINFO_UORDBLKS, mallinfo.uordblks as i64);
set_gauge(&MALLINFO_FORDBLKS, mallinfo.fordblks as i64);
set_gauge(&MALLINFO_KEEPCOST, mallinfo.keepcost as i64);
}

/// Perform all configuration routines.
pub fn configure_glibc_malloc() -> Result<(), String> {
if !env_var_present(ENV_VAR_ARENA_MAX) {
let arena_max = match OPTIMAL_ARENA_MAX {
ArenaMaxSetting::DoNotSet => None,
ArenaMaxSetting::Fixed(n) => Some(n),
ArenaMaxSetting::NumCpus => Some(num_cpus::get() as c_int),
};

if let Some(max) = arena_max {
if let Err(e) = malloc_arena_max(max) {
return Err(format!("failed (code {}) to set malloc max arena count", e));
}
}
}

if !env_var_present(ENV_VAR_MMAP_THRESHOLD) {
if let Err(e) = malloc_mmap_threshold(OPTIMAL_MMAP_THRESHOLD) {
return Err(format!("failed (code {}) to set malloc mmap threshold", e));
}
}

Ok(())
}

/// Returns `true` if an environment variable is present.
fn env_var_present(name: &str) -> bool {
env::var(name) != Err(env::VarError::NotPresent)
}

/// Uses `mallopt` to set the `M_ARENA_MAX` value, specifying the number of memory arenas to be
/// created by malloc.
///
/// Generally speaking, a smaller arena count reduces memory fragmentation at the cost of memory contention
/// between threads.
///
/// ## Resources
///
/// - https://man7.org/linux/man-pages/man3/mallopt.3.html
fn malloc_arena_max(num_arenas: c_int) -> Result<(), c_int> {
into_result(mallopt(M_ARENA_MAX, num_arenas))
}

/// Uses `mallopt` to set the `M_MMAP_THRESHOLD` value, specifying the threshold where objects of this
/// size or larger are allocated via an `mmap`.
///
/// ## Resources
///
/// - https://man7.org/linux/man-pages/man3/mallopt.3.html
fn malloc_mmap_threshold(num_arenas: c_int) -> Result<(), c_int> {
into_result(mallopt(M_MMAP_THRESHOLD, num_arenas))
}

fn mallopt(param: c_int, val: c_int) -> c_int {
// Prevent this function from being called in parallel with any other non-thread-safe function.
let _lock = GLOBAL_LOCK.lock();
unsafe { libc::mallopt(param, val) }
}

fn mallinfo() -> libc::mallinfo {
// Prevent this function from being called in parallel with any other non-thread-safe function.
let _lock = GLOBAL_LOCK.lock();
unsafe { libc::mallinfo() }
}

fn into_result(result: c_int) -> Result<(), c_int> {
if result == 1 {
Ok(())
} else {
Err(result)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn malloc_arena_max_does_not_panic() {
malloc_arena_max(2).unwrap();
}

#[test]
fn malloc_mmap_threshold_does_not_panic() {
malloc_mmap_threshold(OPTIMAL_MMAP_THRESHOLD).unwrap();
}
}
Loading

0 comments on commit 456b313

Please sign in to comment.