Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Commit

Permalink
Fail fast if managed task workers are near-OOM (#1657)
Browse files Browse the repository at this point in the history
- Add `onefuzz::memory::available_bytes()` to enable checking system-wide memory usage
- In managed task worker runs, heuristically check for imminent OOM conditions and try to exit early
  • Loading branch information
ranweiler authored Mar 1, 2022
1 parent 7f93216 commit 1b01981
Show file tree
Hide file tree
Showing 8 changed files with 288 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/agent/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

83 changes: 80 additions & 3 deletions src/agent/onefuzz-agent/src/managed/cmd.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::path::PathBuf;

#[cfg(not(target_os = "macos"))]
use std::time::Duration;

use crate::tasks::config::{CommonConfig, Config};
use anyhow::Result;
use clap::{App, Arg, SubCommand};
use std::path::PathBuf;

use crate::tasks::config::{CommonConfig, Config};

#[cfg(not(target_os = "macos"))]
const OOM_CHECK_INTERVAL: Duration = Duration::from_secs(5);

pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
Expand All @@ -13,7 +20,22 @@ pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
let config = Config::from_file(config_path, setup_dir)?;

init_telemetry(config.common());
let result = config.run().await;

let min_available_memory_bytes = 1_000_000 * config.common().min_available_memory_mb;

// If the memory limit is 0, this will resolve immediately with an error.
let check_oom = out_of_memory(min_available_memory_bytes);

let result = tokio::select! {
result = config.run() => result,

// Ignore this task if it returns due to a querying error.
Ok(oom) = check_oom => {
// Convert the OOM notification to an error, so we can log it below.
let err = format_err!("out of memory: {} bytes available, {} required", oom.available_bytes, oom.min_bytes);
Err(err)
},
};

if let Err(err) = &result {
error!("error running task: {:?}", err);
Expand All @@ -23,6 +45,61 @@ pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
result
}

#[cfg(not(target_os = "macos"))]
const MAX_OOM_QUERY_ERRORS: usize = 5;

// Periodically check available system memory.
//
// If available memory drops below the minimum, exit informatively.
//
// Parameterized to enable future configuration by VMSS.
#[cfg(not(target_os = "macos"))]
async fn out_of_memory(min_bytes: u64) -> Result<OutOfMemory> {
if min_bytes == 0 {
bail!("available memory minimum is unreachable");
}

let mut consecutive_query_errors = 0;

loop {
match onefuzz::memory::available_bytes() {
Ok(available_bytes) => {
// Reset so we count consecutive errors.
consecutive_query_errors = 0;

if available_bytes < min_bytes {
return Ok(OutOfMemory {
available_bytes,
min_bytes,
});
}
}
Err(err) => {
warn!("error querying system memory usage: {}", err);

consecutive_query_errors += 1;

if consecutive_query_errors > MAX_OOM_QUERY_ERRORS {
return Err(err);
}
}
}

tokio::time::sleep(OOM_CHECK_INTERVAL).await;
}
}

#[cfg(target_os = "macos")]
async fn out_of_memory(_min_bytes: u64) -> Result<OutOfMemory> {
// Resolve immediately.
bail!("out-of-memory check not implemented on macOS")
}

struct OutOfMemory {
available_bytes: u64,
min_bytes: u64,
}

fn init_telemetry(config: &CommonConfig) {
onefuzz_telemetry::set_appinsights_clients(
config.instance_telemetry_key.clone(),
Expand Down
14 changes: 14 additions & 0 deletions src/agent/onefuzz-agent/src/tasks/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ use serde::{self, Deserialize};
use std::{path::PathBuf, sync::Arc, time::Duration};
use uuid::Uuid;

const DEFAULT_MIN_AVAILABLE_MEMORY_MB: u64 = 100;

fn default_min_available_memory_mb() -> u64 {
DEFAULT_MIN_AVAILABLE_MEMORY_MB
}

#[derive(Debug, Deserialize, PartialEq, Clone)]
pub enum ContainerType {
#[serde(alias = "inputs")]
Expand All @@ -42,6 +48,14 @@ pub struct CommonConfig {

#[serde(default)]
pub setup_dir: PathBuf,

/// Lower bound on available system memory. If the available memory drops
/// below the limit, the task will exit with an error. This is a fail-fast
/// mechanism to support debugging.
///
/// Can be disabled by setting to 0.
#[serde(default = "default_min_available_memory_mb")]
pub min_available_memory_mb: u64,
}

impl CommonConfig {
Expand Down
1 change: 1 addition & 0 deletions src/agent/onefuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ backoff = { version = "0.3", features = ["tokio"] }
winreg = "0.10"
input-tester = { path = "../input-tester" }
debugger = { path = "../debugger" }
winapi = { version = "0.3", features = ["impl-default", "psapi"] }

[target.'cfg(target_family = "unix")'.dependencies]
cpp_demangle = "0.3"
Expand Down
11 changes: 11 additions & 0 deletions src/agent/onefuzz/examples/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#[cfg(not(target_os = "macos"))]
fn main() {
let bytes = onefuzz::memory::available_bytes().unwrap();
let gb = (bytes as f64) * 1e-9;
println!("available bytes: {} ({:.1} GB)", bytes, gb);
}

#[cfg(target_os = "macos")]
fn main() {
unimplemented!()
}
1 change: 1 addition & 0 deletions src/agent/onefuzz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub mod input_tester;
pub mod jitter;
pub mod libfuzzer;
pub mod machine_id;
pub mod memory;
pub mod monitor;
pub mod process;
pub mod sha256;
Expand Down
81 changes: 81 additions & 0 deletions src/agent/onefuzz/src/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#[cfg(target_os = "windows")]
use std::convert::TryFrom;

#[cfg(not(target_os = "macos"))]
use anyhow::Result;

#[cfg(target_os = "linux")]
use regex::Regex;

#[cfg(target_os = "windows")]
use winapi::um::psapi::PERFORMANCE_INFORMATION;

#[cfg(target_os = "windows")]
pub fn available_bytes() -> Result<u64> {
let info = get_performance_info()?;
let pages = info.CommitLimit.saturating_sub(info.CommitTotal);
let bytes = pages * info.PageSize;
let bytes = u64::try_from(bytes)?;

Ok(bytes)
}

#[cfg(target_os = "windows")]
fn get_performance_info() -> Result<PERFORMANCE_INFORMATION> {
use winapi::shared::minwindef::FALSE;
use winapi::um::errhandlingapi::GetLastError;
use winapi::um::psapi::GetPerformanceInfo;

let mut info = PERFORMANCE_INFORMATION::default();

let success = unsafe {
// Will always fit in a `u32`.
//
// https://docs.microsoft.com/en-us/windows/win32/api/psapi/ns-psapi-performance_information
let size = std::mem::size_of::<PERFORMANCE_INFORMATION>();
let size = u32::try_from(size)?;
GetPerformanceInfo(&mut info, size)
};

if success == FALSE {
let code = unsafe { GetLastError() };
bail!("error querying performance information: {:x}", code);
}

Ok(info)
}

#[cfg(target_os = "linux")]
pub fn available_bytes() -> Result<u64> {
const BYTES_PER_KB: u64 = 1024;

let meminfo = std::fs::read_to_string("/proc/meminfo")?;
let available_kb = parse_available_kb(&meminfo)?;
let available_bytes = available_kb * BYTES_PER_KB;

Ok(available_bytes)
}

#[cfg(target_os = "linux")]
fn parse_available_kb(meminfo: &str) -> Result<u64> {
let captures = AVAILABLE_KB
.captures(meminfo)
.ok_or_else(|| format_err!("`MemAvailable` not found in `/proc/meminfo`"))?;

let available_kb = captures
.get(1)
.ok_or_else(|| format_err!("`MemAvailable` not found in `/proc/meminfo`"))?
.as_str()
.parse()?;

Ok(available_kb)
}

#[cfg(target_os = "linux")]
lazy_static::lazy_static! {
static ref AVAILABLE_KB: Regex = Regex::new(r"MemAvailable:\s*(\d+) kB").unwrap();
}

#[cfg(test)]
#[cfg(target_os = "linux")]
mod tests_linux;
99 changes: 99 additions & 0 deletions src/agent/onefuzz/src/memory/tests_linux.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use anyhow::Result;

use super::parse_available_kb;

#[test]
fn test_parse_available_kb() -> Result<()> {
assert_eq!(parse_available_kb(MEMINFO)?, 1001092);
assert_eq!(parse_available_kb("MemAvailable: 1001092 kB")?, 1001092);
assert_eq!(
parse_available_kb("MemAvailable: 1001092 kB\tMemAvailable: 123 kB")?,
1001092
);
assert_eq!(
parse_available_kb(" MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(parse_available_kb(" MemAvailable:1001092 kB")?, 1001092);
assert_eq!(parse_available_kb(" MemAvailable: 1001092 kB")?, 1001092);
assert_eq!(
parse_available_kb(" MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable:1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);

Ok(())
}

#[test]
fn test_parse_available_kb_missing() {
assert!(parse_available_kb("").is_err());
assert!(parse_available_kb("1001092").is_err());
assert!(parse_available_kb("MemAvailable: ").is_err());
assert!(parse_available_kb("MemAvailable: 1001092 MB").is_err());
assert!(parse_available_kb("MemFree: 198308 kB").is_err());
}

const MEMINFO: &str = "MemTotal: 16036984 kB
MemFree: 198308 kB
MemAvailable: 1001092 kB
Buffers: 521880 kB
Cached: 459416 kB
SwapCached: 1580 kB
Active: 830140 kB
Inactive: 206728 kB
Active(anon): 22492 kB
Inactive(anon): 28876 kB
Active(file): 807648 kB
Inactive(file): 177852 kB
Unevictable: 0 kB
Mlocked: 0 kB
SwapTotal: 4194300 kB
SwapFree: 4181440 kB
Dirty: 8 kB
Writeback: 0 kB
AnonPages: 54368 kB
Mapped: 31344 kB
Shmem: 792 kB
Slab: 192900 kB
SReclaimable: 131056 kB
SUnreclaim: 61844 kB
KernelStack: 3104 kB
PageTables: 5324 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 12212792 kB
Committed_AS: 575108 kB
VmallocTotal: 34359738367 kB
VmallocUsed: 0 kB
VmallocChunk: 0 kB
HardwareCorrupted: 0 kB
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
ShmemPmdMapped: 0 kB
CmaTotal: 0 kB
CmaFree: 0 kB
HugePages_Total: 0
HugePages_Free: 0
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
DirectMap4k: 152880 kB
DirectMap2M: 4696064 kB
DirectMap1G: 11534336 kB";

0 comments on commit 1b01981

Please sign in to comment.