Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Fail fast if managed task workers are near-OOM #1657

Merged
merged 21 commits into from
Mar 1, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/agent/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 46 additions & 1 deletion src/agent/onefuzz-agent/src/managed/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ use crate::tasks::config::{CommonConfig, Config};
use anyhow::Result;
use clap::{App, Arg, SubCommand};
use std::path::PathBuf;
use std::time::Duration;

// 100 MB.
const MIN_AVAILABLE_BYTES: u64 = 100 * 1_000_000;

const OOM_CHECK_INTERVAL: Duration = Duration::from_secs(5);

pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
Expand All @@ -13,7 +19,17 @@ pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
let config = Config::from_file(config_path, setup_dir)?;

init_telemetry(config.common());
let result = config.run().await;

let result = tokio::select! {
result = config.run() => result,

// Ignore this task if it returns due to a querying error.
Ok(oom) = out_of_memory(MIN_AVAILABLE_BYTES) => {
ranweiler marked this conversation as resolved.
Show resolved Hide resolved
// Convert the OOM notification to an error, so we can log it below.
let err = format_err!("out of memory: {} bytes available, {} required", oom.available_bytes, oom.min_bytes);
Err(err)
ranweiler marked this conversation as resolved.
Show resolved Hide resolved
},
};

if let Err(err) = &result {
error!("error running task: {:?}", err);
Expand All @@ -23,6 +39,35 @@ pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> {
result
}

// Periodically check available system memory.
//
// If available memory drops below the minimum, exit informatively.
async fn out_of_memory(min_bytes: u64) -> Result<OutOfMemory> {
ranweiler marked this conversation as resolved.
Show resolved Hide resolved
loop {
match onefuzz::memory::available_bytes() {
Ok(available_bytes) => {
if available_bytes < min_bytes {
return Ok(OutOfMemory {
available_bytes,
min_bytes,
});
}
}
Err(err) => {
warn!("error querying system memory usage: {}", err);
return Err(err);
}
}
ranweiler marked this conversation as resolved.
Show resolved Hide resolved

tokio::time::sleep(OOM_CHECK_INTERVAL).await;
}
}

struct OutOfMemory {
available_bytes: u64,
min_bytes: u64,
}

fn init_telemetry(config: &CommonConfig) {
onefuzz_telemetry::set_appinsights_clients(
config.instance_telemetry_key.clone(),
Expand Down
1 change: 1 addition & 0 deletions src/agent/onefuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ backoff = { version = "0.3", features = ["tokio"] }
winreg = "0.10"
input-tester = { path = "../input-tester" }
debugger = { path = "../debugger" }
winapi = { version = "0.3", features = ["impl-default", "psapi"] }

[target.'cfg(target_family = "unix")'.dependencies]
cpp_demangle = "0.3"
Expand Down
5 changes: 5 additions & 0 deletions src/agent/onefuzz/examples/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
fn main() {
ranweiler marked this conversation as resolved.
Show resolved Hide resolved
let bytes = onefuzz::memory::available_bytes().unwrap();
let gb = (bytes as f64) * 1e-9;
println!("available bytes: {} ({:.1} GB)", bytes, gb);
}
1 change: 1 addition & 0 deletions src/agent/onefuzz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub mod input_tester;
pub mod jitter;
pub mod libfuzzer;
pub mod machine_id;
pub mod memory;
pub mod monitor;
pub mod process;
pub mod sha256;
Expand Down
80 changes: 80 additions & 0 deletions src/agent/onefuzz/src/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#[cfg(target_os = "windows")]
use std::convert::TryFrom;

use anyhow::Result;

#[cfg(target_os = "linux")]
use regex::Regex;

#[cfg(target_os = "windows")]
use winapi::um::psapi::PERFORMANCE_INFORMATION;

#[cfg(target_os = "windows")]
pub fn available_bytes() -> Result<u64> {
let info = get_performance_info()?;
let pages = info.CommitLimit.saturating_sub(info.CommitTotal);
let bytes = pages * info.PageSize;
let bytes = u64::try_from(bytes)?;

Ok(bytes)
}

#[cfg(target_os = "windows")]
fn get_performance_info() -> Result<PERFORMANCE_INFORMATION> {
use winapi::shared::minwindef::FALSE;
use winapi::um::errhandlingapi::GetLastError;
use winapi::um::psapi::GetPerformanceInfo;

let mut info = PERFORMANCE_INFORMATION::default();

let success = unsafe {
// Will always fit in a `u32`.
//
// https://docs.microsoft.com/en-us/windows/win32/api/psapi/ns-psapi-performance_information
let size = std::mem::size_of::<PERFORMANCE_INFORMATION>();
let size = u32::try_from(size)?;
GetPerformanceInfo(&mut info, size)
};

if success == FALSE {
let code = unsafe { GetLastError() };
bail!("error querying performance information: {:x}", code);
}

Ok(info)
}

#[cfg(target_os = "linux")]
pub fn available_bytes() -> Result<u64> {
const BYTES_PER_KB: u64 = 1024;

let meminfo = std::fs::read_to_string("/proc/meminfo")?;
let available_kb = parse_available_kb(&meminfo)?;
let available_bytes = available_kb * BYTES_PER_KB;

Ok(available_bytes)
}

#[cfg(target_os = "linux")]
fn parse_available_kb(meminfo: &str) -> Result<u64> {
let captures = AVAILABLE_KB
.captures(&meminfo)
.ok_or_else(|| format_err!("`MemAvailable` not found in `/proc/meminfo`"))?;

let available_kb = captures
.get(1)
.ok_or_else(|| format_err!("`MemAvailable` not found in `/proc/meminfo`"))?
.as_str()
.parse()?;

Ok(available_kb)
}

#[cfg(target_os = "linux")]
lazy_static::lazy_static! {
static ref AVAILABLE_KB: Regex = Regex::new(r"MemAvailable:\s*(\d+) kB").unwrap();
}

#[cfg(test)]
#[cfg(target_os = "linux")]
mod tests_linux;
99 changes: 99 additions & 0 deletions src/agent/onefuzz/src/memory/tests_linux.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use anyhow::Result;

use super::parse_available_kb;

#[test]
fn test_parse_available_kb() -> Result<()> {
assert_eq!(parse_available_kb(MEMINFO)?, 1001092);
assert_eq!(parse_available_kb("MemAvailable: 1001092 kB")?, 1001092);
assert_eq!(
parse_available_kb("MemAvailable: 1001092 kB\tMemAvailable: 123 kB")?,
1001092
);
assert_eq!(
parse_available_kb(" MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(parse_available_kb(" MemAvailable:1001092 kB")?, 1001092);
assert_eq!(parse_available_kb(" MemAvailable: 1001092 kB")?, 1001092);
assert_eq!(
parse_available_kb(" MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable:1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);
assert_eq!(
parse_available_kb("extra MemAvailable: 1001092 kB")?,
1001092
);

Ok(())
}

#[test]
fn test_parse_available_kb_missing() {
assert!(parse_available_kb("").is_err());
assert!(parse_available_kb("1001092").is_err());
assert!(parse_available_kb("MemAvailable: ").is_err());
assert!(parse_available_kb("MemAvailable: 1001092 MB").is_err());
assert!(parse_available_kb("MemFree: 198308 kB").is_err());
}

const MEMINFO: &str = "MemTotal: 16036984 kB
MemFree: 198308 kB
MemAvailable: 1001092 kB
Buffers: 521880 kB
Cached: 459416 kB
SwapCached: 1580 kB
Active: 830140 kB
Inactive: 206728 kB
Active(anon): 22492 kB
Inactive(anon): 28876 kB
Active(file): 807648 kB
Inactive(file): 177852 kB
Unevictable: 0 kB
Mlocked: 0 kB
SwapTotal: 4194300 kB
SwapFree: 4181440 kB
Dirty: 8 kB
Writeback: 0 kB
AnonPages: 54368 kB
Mapped: 31344 kB
Shmem: 792 kB
Slab: 192900 kB
SReclaimable: 131056 kB
SUnreclaim: 61844 kB
KernelStack: 3104 kB
PageTables: 5324 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 12212792 kB
Committed_AS: 575108 kB
VmallocTotal: 34359738367 kB
VmallocUsed: 0 kB
VmallocChunk: 0 kB
HardwareCorrupted: 0 kB
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
ShmemPmdMapped: 0 kB
CmaTotal: 0 kB
CmaFree: 0 kB
HugePages_Total: 0
HugePages_Free: 0
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
DirectMap4k: 152880 kB
DirectMap2M: 4696064 kB
DirectMap1G: 11534336 kB";