Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Tune GNU malloc #2299

Closed
wants to merge 42 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
5298f50
First impl of malloc_trim
paulhauner Apr 3, 2021
ba95d0d
Remove build.rs, improve docs
paulhauner Apr 3, 2021
128a5bd
Set mmap threshold to 1mb
paulhauner Apr 3, 2021
6182f49
Increase mmap threshold to 2mb
paulhauner Apr 3, 2021
d9b8d6b
Refactor malloc_ctl API
paulhauner Apr 6, 2021
20d9078
Ignore clippy lint
paulhauner Apr 6, 2021
008826b
Rename default -> optimal
paulhauner Apr 6, 2021
c158c13
Make non-mac functions pub
paulhauner Apr 6, 2021
e9ee4c8
Add musl conditionals
paulhauner Apr 7, 2021
5ce393c
Add malloc metrics
paulhauner Apr 8, 2021
72888eb
Add missed metric
paulhauner Apr 8, 2021
ac9aa3c
Increase malloc_trim interval to 5m
paulhauner Apr 8, 2021
a636a49
Hold thread join handle
paulhauner Apr 8, 2021
035ab39
Rename malloc_ctl -> malloc_utils
paulhauner Apr 13, 2021
3a8f7df
Remove malloc stats endpoint
paulhauner Apr 13, 2021
7e0348c
Merge in `attn-smallvec` (#2182)
paulhauner Apr 13, 2021
27f1fd0
Ensure glibc isn't compiled on non-linux
paulhauner Apr 13, 2021
35d3e12
Tidy conditional compilation
paulhauner Apr 13, 2021
58a89a1
Fix clippy lint
paulhauner Apr 13, 2021
b7b6ac4
Disable trimmer
paulhauner Apr 13, 2021
f53174a
Revert "Disable trimmer"
paulhauner Apr 14, 2021
c57265e
Set arenas to 4
paulhauner Apr 14, 2021
443c7e8
Un-revert "Disable trimmer"
paulhauner Apr 18, 2021
38d851f
Set arena max to 0
paulhauner Apr 26, 2021
a6bd680
Disable call to set arena max
paulhauner Apr 26, 2021
acdecf6
Set arena max to 1
paulhauner May 4, 2021
c2ed09f
Merge branch 'unstable' into malloc_trim
paulhauner May 10, 2021
34193c2
Merge branch 'unstable' into malloc_trim
paulhauner May 25, 2021
4762a89
Set arena max to 4
paulhauner May 26, 2021
3163796
Set arena to CPU count
paulhauner May 26, 2021
9a892d2
Remove trim, eprintln
paulhauner May 27, 2021
00a80f9
Tidy
paulhauner May 28, 2021
268dc4e
Add malloc threshold test
paulhauner May 28, 2021
4a90729
Revery changes to bitfield
paulhauner May 28, 2021
7d87746
Freshen Cargo.lock
paulhauner May 28, 2021
a1ca131
Add CLI tests
paulhauner May 28, 2021
88306c6
Add comment about malloc2
paulhauner May 28, 2021
fca7488
Use libc::mallopt, protect against concurrency
paulhauner May 28, 2021
ddcdda9
Gate malloc metrics behind CLI flag
paulhauner May 28, 2021
cb07b74
Fix broken metrics tests
paulhauner May 28, 2021
4a679e6
Drop mmap threshold to 1mb
paulhauner May 28, 2021
eea4186
Set mmap threshold back to 2mb
paulhauner May 28, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ members = [
"common/lockfile",
"common/logging",
"common/lru_cache",
"common/malloc_utils",
"common/remote_signer_consumer",
"common/sensitive_url",
"common/slot_clock",
Expand Down
1 change: 1 addition & 0 deletions beacon_node/http_metrics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ lazy_static = "1.4.0"
eth2 = { path = "../../common/eth2" }
lighthouse_version = { path = "../../common/lighthouse_version" }
warp_utils = { path = "../../common/warp_utils" }
malloc_utils = { path = "../../common/malloc_utils" }

[dev-dependencies]
tokio = { version = "1.1.0", features = ["sync"] }
Expand Down
2 changes: 2 additions & 0 deletions beacon_node/http_metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ pub struct Config {
pub listen_addr: Ipv4Addr,
pub listen_port: u16,
pub allow_origin: Option<String>,
pub allocator_metrics_enabled: bool,
}

impl Default for Config {
Expand All @@ -58,6 +59,7 @@ impl Default for Config {
listen_addr: Ipv4Addr::new(127, 0, 0, 1),
listen_port: 5054,
allow_origin: None,
allocator_metrics_enabled: true,
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions beacon_node/http_metrics/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::Context;
use beacon_chain::BeaconChainTypes;
use lighthouse_metrics::{Encoder, TextEncoder};
use malloc_utils::scrape_allocator_metrics;

pub use lighthouse_metrics::*;

Expand Down Expand Up @@ -41,6 +42,12 @@ pub fn gather_prometheus_metrics<T: BeaconChainTypes>(

warp_utils::metrics::scrape_health_metrics();

// It's important to ensure these metrics are explicitly enabled in the case that users aren't
// using glibc and this function causes panics.
if ctx.config.allocator_metrics_enabled {
scrape_allocator_metrics();
}

encoder
.encode(&lighthouse_metrics::gather(), &mut buffer)
.unwrap();
Expand Down
1 change: 1 addition & 0 deletions beacon_node/http_metrics/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ async fn returns_200_ok() {
listen_addr: Ipv4Addr::new(127, 0, 0, 1),
listen_port: 0,
allow_origin: None,
allocator_metrics_enabled: true,
},
chain: None,
db_path: None,
Expand Down
7 changes: 6 additions & 1 deletion beacon_node/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use clap::ArgMatches;
use clap_utils::BAD_TESTNET_DIR_MESSAGE;
use clap_utils::{flags::DISABLE_MALLOC_TUNING_FLAG, BAD_TESTNET_DIR_MESSAGE};
use client::{ClientConfig, ClientGenesis};
use directory::{DEFAULT_BEACON_NODE_DIR, DEFAULT_NETWORK_DIR, DEFAULT_ROOT_DIR};
use eth2_libp2p::{multiaddr::Protocol, Enr, Multiaddr, NetworkConfig, PeerIdSerialized};
Expand Down Expand Up @@ -145,6 +145,11 @@ pub fn get_config<E: EthSpec>(
);
}

// Do not scrape for malloc metrics if we've disabled tuning malloc as it may cause panics.
if cli_args.is_present(DISABLE_MALLOC_TUNING_FLAG) {
client_config.http_metrics.allocator_metrics_enabled = false;
}

/*
* Eth1
*/
Expand Down
3 changes: 3 additions & 0 deletions common/clap_utils/src/flags.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! CLI flags used across the Lighthouse code base can be located here.

pub const DISABLE_MALLOC_TUNING_FLAG: &str = "disable-malloc-tuning";
2 changes: 2 additions & 0 deletions common/clap_utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use ssz::Decode;
use std::path::PathBuf;
use std::str::FromStr;

pub mod flags;

pub const BAD_TESTNET_DIR_MESSAGE: &str = "The hard-coded testnet directory was invalid. \
This happens when Lighthouse is migrating between spec versions \
or when there is no default public network to connect to. \
Expand Down
14 changes: 14 additions & 0 deletions common/malloc_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "malloc_utils"
version = "0.1.0"
authors = ["Paul Hauner <paul@paulhauner.com>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
lighthouse_metrics = { path = "../lighthouse_metrics" }
lazy_static = "1.4.0"
libc = "0.2.79"
parking_lot = "0.11.0"
num_cpus = "1.13.0"
210 changes: 210 additions & 0 deletions common/malloc_utils/src/glibc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
//! Contains functions for tuning and controlling "The GNU Allocator", included in the `glibc`
//! library.
//!
//! https://www.gnu.org/software/libc/manual/html_node/The-GNU-Allocator.html
//!
//! These functions are generally only suitable for Linux systems.
use lazy_static::lazy_static;
use lighthouse_metrics::*;
use parking_lot::Mutex;
use std::env;
use std::os::raw::c_int;
use std::result::Result;

/// The value to be provided to `malloc_mmap_threshold`.
///
/// Value chosen so that values of the validators tree hash cache will *not* be allocated via
/// `mmap`.
///
/// The size of a single chunk is:
///
/// NODES_PER_VALIDATOR * VALIDATORS_PER_ARENA * 32 = 15 * 4096 * 32 = 1.875 MiB
const OPTIMAL_MMAP_THRESHOLD: c_int = 2 * 1_024 * 1_024;

/// The maximum number of arenas allowed to be created by malloc.
///
/// See `ArenaMaxSetting` docs for details.
const OPTIMAL_ARENA_MAX: ArenaMaxSetting = ArenaMaxSetting::NumCpus;

/// Constants used to configure malloc internals.
///
/// Source:
///
/// https://github.com/lattera/glibc/blob/895ef79e04a953cac1493863bcae29ad85657ee1/malloc/malloc.h#L115-L123
const M_MMAP_THRESHOLD: c_int = -4;
const M_ARENA_MAX: c_int = -8;

/// Environment variables used to configure malloc.
///
/// Source:
///
/// https://man7.org/linux/man-pages/man3/mallopt.3.html
const ENV_VAR_ARENA_MAX: &str = "MALLOC_ARENA_MAX";
const ENV_VAR_MMAP_THRESHOLD: &str = "MALLOC_MMAP_THRESHOLD_";

#[allow(dead_code)]
enum ArenaMaxSetting {
/// Do not set any value for MALLOC_ARENA_MAX, leave it as default.
DoNotSet,
/// Set a fixed value.
Fixed(c_int),
/// Read the number of CPUs at runtime and use that value.
NumCpus,
}

lazy_static! {
pub static ref GLOBAL_LOCK: Mutex<()> = <_>::default();
}

// Metrics for the malloc. For more information, see:
//
// https://man7.org/linux/man-pages/man3/mallinfo.3.html
lazy_static! {
pub static ref MALLINFO_ARENA: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_arena",
"The total amount of memory allocated by means other than mmap(2). \
This figure includes both in-use blocks and blocks on the free list.",
);
pub static ref MALLINFO_ORDBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_ordblks",
"The number of ordinary (i.e., non-fastbin) free blocks.",
);
pub static ref MALLINFO_SMBLKS: lighthouse_metrics::Result<IntGauge> =
try_create_int_gauge("mallinfo_smblks", "The number of fastbin free blocks.",);
pub static ref MALLINFO_HBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_hblks",
"The number of blocks currently allocated using mmap.",
);
pub static ref MALLINFO_HBLKHD: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_hblkhd",
"The number of bytes in blocks currently allocated using mmap.",
);
pub static ref MALLINFO_FSMBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_fsmblks",
"The total number of bytes in fastbin free blocks.",
);
pub static ref MALLINFO_UORDBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_uordblks",
"The total number of bytes used by in-use allocations.",
);
pub static ref MALLINFO_FORDBLKS: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_fordblks",
"The total number of bytes in free blocks.",
);
pub static ref MALLINFO_KEEPCOST: lighthouse_metrics::Result<IntGauge> = try_create_int_gauge(
"mallinfo_keepcost",
"The total amount of releasable free space at the top of the heap..",
);
}

/// Calls `mallinfo` and updates Prometheus metrics with the results.
pub fn scrape_mallinfo_metrics() {
// The docs for this function say it is thread-unsafe since it may return inconsistent results.
// Since these are just metrics it's not a concern to us if they're sometimes inconsistent.
michaelsproul marked this conversation as resolved.
Show resolved Hide resolved
//
// There exists a `malloc2` function, however it was release in February 2021 and this seems too
// recent to rely on.
//
// Docs:
//
// https://man7.org/linux/man-pages/man3/mallinfo.3.html
let mallinfo = mallinfo();

set_gauge(&MALLINFO_ARENA, mallinfo.arena as i64);
set_gauge(&MALLINFO_ORDBLKS, mallinfo.ordblks as i64);
set_gauge(&MALLINFO_SMBLKS, mallinfo.smblks as i64);
set_gauge(&MALLINFO_HBLKS, mallinfo.hblks as i64);
set_gauge(&MALLINFO_HBLKHD, mallinfo.hblkhd as i64);
set_gauge(&MALLINFO_FSMBLKS, mallinfo.fsmblks as i64);
set_gauge(&MALLINFO_UORDBLKS, mallinfo.uordblks as i64);
set_gauge(&MALLINFO_FORDBLKS, mallinfo.fordblks as i64);
set_gauge(&MALLINFO_KEEPCOST, mallinfo.keepcost as i64);
}

/// Perform all configuration routines.
pub fn configure_glibc_malloc() -> Result<(), String> {
if !env_var_present(ENV_VAR_ARENA_MAX) {
let arena_max = match OPTIMAL_ARENA_MAX {
ArenaMaxSetting::DoNotSet => None,
ArenaMaxSetting::Fixed(n) => Some(n),
ArenaMaxSetting::NumCpus => Some(num_cpus::get() as c_int),
};

if let Some(max) = arena_max {
if let Err(e) = malloc_arena_max(max) {
return Err(format!("failed (code {}) to set malloc max arena count", e));
}
}
}

if !env_var_present(ENV_VAR_MMAP_THRESHOLD) {
if let Err(e) = malloc_mmap_threshold(OPTIMAL_MMAP_THRESHOLD) {
return Err(format!("failed (code {}) to set malloc mmap threshold", e));
}
}

Ok(())
}

/// Returns `true` if an environment variable is present.
fn env_var_present(name: &str) -> bool {
env::var(name) != Err(env::VarError::NotPresent)
}

/// Uses `mallopt` to set the `M_ARENA_MAX` value, specifying the number of memory arenas to be
/// created by malloc.
///
/// Generally speaking, a smaller arena count reduces memory fragmentation at the cost of memory contention
/// between threads.
///
/// ## Resources
///
/// - https://man7.org/linux/man-pages/man3/mallopt.3.html
fn malloc_arena_max(num_arenas: c_int) -> Result<(), c_int> {
into_result(mallopt(M_ARENA_MAX, num_arenas))
}

/// Uses `mallopt` to set the `M_MMAP_THRESHOLD` value, specifying the threshold where objects of this
/// size or larger are allocated via an `mmap`.
///
/// ## Resources
///
/// - https://man7.org/linux/man-pages/man3/mallopt.3.html
fn malloc_mmap_threshold(num_arenas: c_int) -> Result<(), c_int> {
into_result(mallopt(M_MMAP_THRESHOLD, num_arenas))
}

fn mallopt(param: c_int, val: c_int) -> c_int {
// Prevent this function from being called in parallel with any other non-thread-safe function.
let _lock = GLOBAL_LOCK.lock();
unsafe { libc::mallopt(param, val) }
}

fn mallinfo() -> libc::mallinfo {
// Prevent this function from being called in parallel with any other non-thread-safe function.
let _lock = GLOBAL_LOCK.lock();
unsafe { libc::mallinfo() }
}

fn into_result(result: c_int) -> Result<(), c_int> {
if result == 1 {
Ok(())
} else {
Err(result)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn malloc_arena_max_does_not_panic() {
malloc_arena_max(2).unwrap();
}

#[test]
fn malloc_mmap_threshold_does_not_panic() {
malloc_mmap_threshold(OPTIMAL_MMAP_THRESHOLD).unwrap();
}
}
Loading