Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve linux process scan using pagemap #96

Merged
merged 7 commits into from
Dec 15, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

99 changes: 78 additions & 21 deletions boreal-test-helpers/src/main.rs
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@ fn main() {
"stack" => stack(),
"max_fetched_region_size" => max_fetched_region_size(),
"memory_chunk_size" => memory_chunk_size(),
"file_copy_on_write" => file_copy_on_write(),
_ => panic!("unknown arg {}", arg),
}
}
@@ -43,24 +44,29 @@ fn stack() {
std::hint::black_box(STATIC_PAYLOAD);
}

// Use this value as the "page_size" we use to construct our regions. This should
// always be a multiple of the page size.
const PAGE_SIZE: usize = 4 * 1024 * 1024;

fn max_fetched_region_size() {
// The searched string is "Dwb6r5gd", and the fetch limit is 20 bytes
// The searched string is "Dwb6r5gd", and the fetch limit is 12MB
let pattern = b"Kxm9}:hk";

// One page will contain the whole string.
// This is "Dwb6r5gd"
let region1 = Region::new(b"Kxm9}:hk");
let region1 = Region::new(pattern);

// This one will still match, since it is exactly 20 bytes
// This is "123456789 Dwb6r5gd"
let region2 = Region::new(b">=<;:9876/Kxm9}:hk");
// This one will still match, since it is exactly 12MB
let mut region2 = Region::zeroed(PAGE_SIZE);
region2.write_at(PAGE_SIZE - 8, pattern);

// This one will not match as it gets cut
// This is "123456789 12345 Dwb6r5gd"
let region3 = Region::new(b">=<;:9876/>=<;:/Kxm9}:hk");
// This one will not match as it gets cut in the middle
let mut region3 = Region::zeroed(PAGE_SIZE + 10);
region3.write_at(PAGE_SIZE - 4, pattern);

// Past the limit so will not get matched
// This is "123456789 123456789 12345 Dwb6r5gd"
let region4 = Region::new(b">=<;:9876/>=<;:9876/>=<;:/Kxm9}:hk");
let mut region4 = Region::zeroed(PAGE_SIZE + 500);
region4.write_at(PAGE_SIZE + 200, pattern);

// Send the base addresses of the region back to the test
println!("{:x}", region1.addr());
@@ -73,23 +79,23 @@ fn max_fetched_region_size() {
}

fn memory_chunk_size() {
// The searched string is "T5aI0uhg7S", and the chunk size is 10MB
let tenmb = 10 * 1024 * 1024;
// The searched string is "T5aI0uhg7S", and the chunk size is 4MB
let pattern = b"[:nF?zgh8\\";

// One page will contain the string, right at the end.
let mut region1 = Region::zeroed(tenmb);
region1.write_at(tenmb - 10, b"[:nF?zgh8\\");
let mut region1 = Region::zeroed(PAGE_SIZE);
region1.write_at(PAGE_SIZE - 10, pattern);

// One page will split the string in two
let mut region2 = Region::zeroed(tenmb + 20);
region2.write_at(tenmb - 5, b"[:nF?zgh8\\");
let mut region2 = Region::zeroed(2 * PAGE_SIZE + 20);
region2.write_at(2 * PAGE_SIZE - 5, pattern);

// One page will contain the string, twice, in two separate chunks
let mut region3 = Region::zeroed(tenmb * 3);
// First one is right at the 15MB limit
region3.write_at(tenmb + 5 * 1024 * 1024 - 5, b"[:nF?zgh8\\");
// Second one is after 20MB
region3.write_at(2 * tenmb + 4096, b"[:nF?zgh8\\");
let mut region3 = Region::zeroed(PAGE_SIZE * 5);
// First one is right at the 3 * PAGE_SIZE limit
region3.write_at(3 * PAGE_SIZE - 5, pattern);
// Second one is after 4 pages
region3.write_at(4 * PAGE_SIZE + 4096, pattern);

// Send the base addresses of the region back to the test
println!("{:x}", region1.addr());
@@ -100,6 +106,37 @@ fn memory_chunk_size() {
std::thread::sleep(std::time::Duration::from_secs(500));
}

fn file_copy_on_write() {
// Bad pattern, must not be matched
let bad = b"]NbJ{m^iYJ";
// Good pattern, must be matched
let good = b"|{j<Lk6[j7";

// Create a file, and write "RAmEtbQfVE" in it
let mut contents = vec![0; 4 * 4096];
xor_bytes_into(bad, 15, &mut contents[2048..2058]);
// Map at offset 500
let mut region1 = Region::copy_on_write(contents, 500);
// overwrite what is written in it to write "ste3Cd9Te8"
region1.write_at(2048 - 500, good);

// New file, with:
// - the good pattern at 1000
// - the bad pattern at 4096 - 5 (between two pages)
// Send the base addresses of the region back to the test
let mut contents = vec![0; 2 * 4096];
xor_bytes_into(good, 15, &mut contents[1000..1010]);
xor_bytes_into(bad, 15, &mut contents[4091..5001]);
let mut region2 = Region::copy_on_write(contents, 0);
region2.write_at(4091, good);

println!("{:x}", region1.addr());
println!("{:x}", region2.addr());

println!("ready");
std::thread::sleep(std::time::Duration::from_secs(500));
}

impl Region {
fn new(contents: &[u8]) -> Self {
let mut this = Self::zeroed(contents.len());
@@ -116,6 +153,26 @@ impl Region {
Self { _file: file, map }
}

fn copy_on_write(mut contents: Vec<u8>, offset: u64) -> Self {
let mut file = tempfile::NamedTempFile::new().unwrap();
file.write_all(&contents).unwrap();

// Erase contents to not let it live in our RAM.
for b in &mut contents {
*b = 0;
}
drop(contents);

let map = unsafe {
memmap2::MmapOptions::new()
.offset(offset)
.map_copy(file.as_file())
.unwrap()
};

Self { _file: file, map }
}

fn write_at(&mut self, offset: usize, payload: &[u8]) {
xor_bytes_into(payload, 15, &mut self.map[offset..(offset + payload.len())]);
}
3 changes: 3 additions & 0 deletions boreal/Cargo.toml
Original file line number Diff line number Diff line change
@@ -68,6 +68,9 @@ authenticode-parser = { version = "0.3", optional = true }
# "memmap" feature
memmap2 = { version = "0.9", optional = true }

[target.'cfg(target_os = "linux")'.dependencies]
libc = "0.2"

[target.'cfg(windows)'.dependencies]
windows = { version = "0.48", features = [
"Win32_Foundation",
11 changes: 1 addition & 10 deletions boreal/src/memory.rs
Original file line number Diff line number Diff line change
@@ -56,15 +56,6 @@ impl Memory<'_> {
}
}

/// True if all the memory is readily available.
#[must_use]
pub fn is_direct(&self) -> bool {
match self {
Self::Direct(_) => true,
Self::Fragmented { .. } => false,
}
}

/// Returns the byte slice of the whole scanned memory if available.
#[must_use]
pub fn get_direct(&self) -> Option<&[u8]> {
@@ -154,7 +145,7 @@ pub trait FragmentedMemory: Send + Sync + std::fmt::Debug {
}

/// A description of a region of memory to scan.
#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct RegionDescription {
/// Index of the start of the region.
pub start: usize,
2 changes: 1 addition & 1 deletion boreal/src/scanner/mod.rs
Original file line number Diff line number Diff line change
@@ -357,7 +357,7 @@ impl Inner {
scan_data.params.process_memory,
);

if !scan_data.params.compute_full_matches && scan_data.mem.is_direct() {
if !scan_data.params.compute_full_matches {
#[cfg(feature = "profiling")]
let start = std::time::Instant::now();

6 changes: 6 additions & 0 deletions boreal/src/scanner/params.rs
Original file line number Diff line number Diff line change
@@ -152,6 +152,9 @@ impl ScanParams {
/// tweaking the [`ScanParams::memory_chunk_size`] parameter, to bound
/// memory consumption while still ensuring every byte is scanned.
///
/// Please note that this value may be adjusted to ensure it is a
/// multiple of the page size.
///
/// By default, this parameter is set to 1GB.
#[must_use]
pub fn max_fetched_region_size(mut self, max_fetched_region_size: usize) -> Self {
@@ -188,6 +191,9 @@ impl ScanParams {
/// `bor`, and the next one starts with `eal`, the string will
/// **not** match.
///
/// Please note that, if set, this value may be adjusted to ensure it
/// is a multiple of the page size.
///
/// By default, this parameter is unset.
#[must_use]
pub fn memory_chunk_size(mut self, memory_chunk_size: Option<usize>) -> Self {
527 changes: 483 additions & 44 deletions boreal/src/scanner/process/sys/linux.rs

Large diffs are not rendered by default.

55 changes: 27 additions & 28 deletions boreal/src/scanner/process/sys/windows.rs
Original file line number Diff line number Diff line change
@@ -41,8 +41,7 @@ pub fn process_memory(pid: u32) -> Result<Box<dyn FragmentedMemory>, ScanError>
Ok(Box::new(WindowsProcessMemory {
handle,
buffer: Vec::new(),
current_position: None,
region: None,
current_region: None,
}))
}

@@ -54,22 +53,21 @@ struct WindowsProcessMemory {
// Buffer used to hold the duplicated process memory when fetched.
buffer: Vec<u8>,

// Current position: current region and offset in the region of the current chunk.
current_position: Option<(RegionDescription, usize)>,

// Current region returned by the next call, which needs to be fetched.
region: Option<RegionDescription>,
// Current region being listed.
current_region: Option<RegionDescription>,
}

impl WindowsProcessMemory {
fn next_position(&self, params: &MemoryParams) -> Option<(RegionDescription, usize)> {
let next_addr = match self.current_position {
Some((desc, mut offset)) => {
fn next_region(&self, params: &MemoryParams) -> Option<RegionDescription> {
let next_addr = match self.current_region {
Some(desc) => {
if let Some(chunk_size) = params.memory_chunk_size {
offset = offset.saturating_add(chunk_size);
if offset < desc.length {
if chunk_size < desc.length {
// Region has a next chunk, so simply select it.
return Some((desc, offset));
return Some(RegionDescription {
start: desc.start.saturating_add(chunk_size),
length: desc.length.saturating_sub(chunk_size),
});
}
}

@@ -78,7 +76,7 @@ impl WindowsProcessMemory {
None => 0,
};

query_next_region(self.handle.as_handle(), next_addr).map(|desc| (desc, 0))
query_next_region(self.handle.as_handle(), next_addr)
}
}

@@ -119,26 +117,17 @@ fn query_next_region(handle: BorrowedHandle, mut next_addr: usize) -> Option<Reg

impl FragmentedMemory for WindowsProcessMemory {
fn reset(&mut self) {
self.region = None;
self.current_region = None;
}

fn next(&mut self, params: &MemoryParams) -> Option<RegionDescription> {
self.current_position = self.next_position(params);

self.region = self
.current_position
.map(|(desc, offset)| match params.memory_chunk_size {
Some(chunk_size) => RegionDescription {
start: desc.start.saturating_add(offset),
length: std::cmp::min(chunk_size, desc.length),
},
None => desc,
});
self.region
self.current_region = self.next_region(params);
self.current_region
.map(|region| get_chunked_region(region, params))
}

fn fetch(&mut self, params: &MemoryParams) -> Option<Region> {
let desc = self.region?;
let desc = get_chunked_region(self.current_region?, params);

self.buffer.resize(
std::cmp::min(desc.length, params.max_fetched_region_size),
@@ -172,6 +161,16 @@ impl FragmentedMemory for WindowsProcessMemory {
}
}

fn get_chunked_region(desc: RegionDescription, params: &MemoryParams) -> RegionDescription {
match params.memory_chunk_size {
Some(chunk_size) => RegionDescription {
start: desc.start,
length: std::cmp::min(chunk_size, desc.length),
},
None => desc,
}
}

fn handle_to_windows_handle(handle: BorrowedHandle) -> HANDLE {
HANDLE(handle.as_raw_handle() as _)
}
123 changes: 102 additions & 21 deletions boreal/tests/it/process.rs
Original file line number Diff line number Diff line change
@@ -7,6 +7,8 @@ use crate::utils::Checker;
#[cfg(any(target_os = "linux", windows))]
use boreal::scanner::ScanError;

const PAGE_SIZE: usize = 4 * 1024 * 1024;

#[test]
#[cfg(any(target_os = "linux", windows))]
fn test_scan_process() {
@@ -142,7 +144,7 @@ rule a {
}"#,
);
let mut scanner = checker.scanner().scanner;
scanner.set_scan_params(ScanParams::default().max_fetched_region_size(20));
scanner.set_scan_params(ScanParams::default().max_fetched_region_size(PAGE_SIZE));

let helper = BinHelper::run("max_fetched_region_size");
assert_eq!(helper.output.len(), 4);
@@ -155,21 +157,21 @@ rule a {
let res = get_boreal_full_matches(&res);
let mut expected = vec![
(b"Dwb6r5gd".as_slice(), region1, 8),
(b"Dwb6r5gd".as_slice(), region2 + 10, 8),
(b"Dwb6r5gd".as_slice(), region2 + PAGE_SIZE - 8, 8),
];
// Sort by address, since the provided regions might not be in the same order as creation.
expected.sort_by_key(|v| v.1);

assert_eq!(res, vec![("default:a".to_owned(), vec![("a", expected)])]);

scanner.set_scan_params(ScanParams::default().max_fetched_region_size(40));
scanner.set_scan_params(ScanParams::default().max_fetched_region_size(PAGE_SIZE * 2));
let res = scanner.scan_process(helper.pid()).unwrap();
let res = get_boreal_full_matches(&res);
let mut expected = vec![
(b"Dwb6r5gd".as_slice(), region1, 8),
(b"Dwb6r5gd".as_slice(), region2 + 10, 8),
(b"Dwb6r5gd".as_slice(), region3 + 16, 8),
(b"Dwb6r5gd".as_slice(), region4 + 26, 8),
(b"Dwb6r5gd".as_slice(), region2 + PAGE_SIZE - 8, 8),
(b"Dwb6r5gd".as_slice(), region3 + PAGE_SIZE - 4, 8),
(b"Dwb6r5gd".as_slice(), region4 + PAGE_SIZE + 200, 8),
];
// Sort by address, since the provided regions might not be in the same order as creation.
expected.sort_by_key(|v| v.1);
@@ -194,48 +196,127 @@ rule a {
}"#,
);
let mut scanner = checker.scanner().scanner;
let tenmb = 10 * 1024 * 1024;
scanner.set_scan_params(ScanParams::default().memory_chunk_size(Some(tenmb)));
scanner.set_scan_params(ScanParams::default().memory_chunk_size(Some(2 * PAGE_SIZE)));

let helper = BinHelper::run("memory_chunk_size");
assert_eq!(helper.output.len(), 3);
dbg!(&helper.output);
let region1 = usize::from_str_radix(&helper.output[0], 16).unwrap();
let region2 = usize::from_str_radix(&helper.output[1], 16).unwrap();
let region3 = usize::from_str_radix(&helper.output[2], 16).unwrap();

let res = scanner.scan_process(helper.pid()).unwrap();
let res = get_boreal_full_matches(&res);
let tenmb = 10 * 1024 * 1024;
let mut expected = vec![
(b"T5aI0uhg7S".as_slice(), region1 + (tenmb - 10), 10),
(
b"T5aI0uhg7S".as_slice(),
region3 + tenmb + 5 * 1024 * 1024 - 5,
10,
),
(b"T5aI0uhg7S".as_slice(), region3 + 2 * tenmb + 4096, 10),
(b"T5aI0uhg7S".as_slice(), region1 + (PAGE_SIZE - 10), 10),
(b"T5aI0uhg7S".as_slice(), region3 + 3 * PAGE_SIZE - 5, 10),
(b"T5aI0uhg7S".as_slice(), region3 + 4 * PAGE_SIZE + 4096, 10),
];
// Sort by address, since the provided regions might not be in the same order as creation.
expected.sort_by_key(|v| v.1);
assert_eq!(res, vec![("default:a".to_owned(), vec![("a", expected)])]);

scanner.set_scan_params(ScanParams::default().memory_chunk_size(Some(15 * 1024 * 1024)));
scanner.set_scan_params(ScanParams::default().memory_chunk_size(Some(3 * PAGE_SIZE)));
let res = scanner.scan_process(helper.pid()).unwrap();
let res = get_boreal_full_matches(&res);
let mut expected = vec![
(b"T5aI0uhg7S".as_slice(), region1 + (tenmb - 10), 10),
(b"T5aI0uhg7S".as_slice(), region1 + (PAGE_SIZE - 10), 10),
// We now see the one in region2
(b"T5aI0uhg7S".as_slice(), region2 + tenmb - 5, 10),
(b"T5aI0uhg7S".as_slice(), region2 + 2 * PAGE_SIZE - 5, 10),
// But no longer see the first one in region3
(b"T5aI0uhg7S".as_slice(), region3 + 2 * tenmb + 4096, 10),
(b"T5aI0uhg7S".as_slice(), region3 + 4 * PAGE_SIZE + 4096, 10),
];
// Sort by address, since the provided regions might not be in the same order as creation.
expected.sort_by_key(|v| v.1);

assert_eq!(res, vec![("default:a".to_owned(), vec![("a", expected)])]);
}

#[test]
#[cfg(any(target_os = "linux", windows))]
fn test_process_file_copy_on_write() {
let mut checker = Checker::new(
r#"
rule a {
strings:
// String written in the file
$a = "RAmEtbQfVE"
condition:
$a
}
rule b {
strings:
// String written over the file contents in the private copy
// of the process
$b = "ste3Cd9Te8"
condition:
$b
}"#,
);

let helper = BinHelper::run("file_copy_on_write");
assert_eq!(helper.output.len(), 2);
let region1 = usize::from_str_radix(&helper.output[0], 16).unwrap();
let region2 = usize::from_str_radix(&helper.output[1], 16).unwrap();

let mut expected = vec![
(b"ste3Cd9Te8".as_slice(), region1 + 2048 - 500, 10),
(b"ste3Cd9Te8".as_slice(), region2 + 1000, 10),
(b"ste3Cd9Te8".as_slice(), region2 + 4096 - 5, 10),
];
expected.sort_by_key(|v| v.1);

checker.check_process_full_matches(
helper.pid(),
vec![("default:b".to_owned(), vec![("b", expected)])],
);
}

// Check that the RAM of a process does not grow too much when it is scanned.
// This is the purpose of the pagemap optimization on linux, so it is only
// implemented on linux.
#[test]
#[cfg(target_os = "linux")]
fn test_process_scan_ram_increase() {
let mut checker = Checker::new(
r#"
rule a {
strings:
$a = "PAYLOAD_ON_STACK"
condition:
all of them
}"#,
);

let helper = BinHelper::run("stack");

fn get_vm_rss(pid: u32) -> u64 {
let status = std::fs::read_to_string(format!("/proc/{}/status", pid)).unwrap();
let rss_line = status
.split('\n')
.find(|line| line.starts_with("VmRSS"))
.unwrap();
let value = rss_line.split_ascii_whitespace().nth(1).unwrap();
// Value is in kB
value.parse::<u64>().unwrap() * 1024
}

let vm_rss_before = get_vm_rss(helper.pid());
checker.check_process(helper.pid(), true);
let vm_rss_after = get_vm_rss(helper.pid());

// Check that the RSS after is "close" to the RSS before, ie, less than 10% more.
// This fails if just reading all of /proc/pid/mem.
let diff = vm_rss_after.saturating_sub(vm_rss_before);
assert!(
diff < vm_rss_before / 10,
"rss before: {}, rss after: {}, increase: {:.2}%",
vm_rss_before,
vm_rss_after,
(diff as f64) * 100. / (vm_rss_before as f64)
);
}

struct BinHelper {
proc: std::process::Child,
output: Vec<String>,
17 changes: 17 additions & 0 deletions boreal/tests/it/utils.rs
Original file line number Diff line number Diff line change
@@ -319,6 +319,23 @@ impl Checker {
}
}

#[track_caller]
pub fn check_process_full_matches(&mut self, pid: u32, expected: FullMatches) {
// We need to compute the full matches for this test
{
let mut scanner = self.scanner.clone();
scanner.set_scan_params(scanner.scan_params().clone().compute_full_matches(true));
let res = scanner.scan_process(pid).unwrap();
let res = get_boreal_full_matches(&res);
assert_eq!(res, expected, "test failed for boreal");
}

if let Some(rules) = &self.yara_rules {
let res = rules.scan_process(pid, 1).unwrap();
check_yara_full_matches(&res, expected);
}
}

#[track_caller]
pub fn check_boreal(&mut self, mem: &[u8], expected_res: bool) {
let res = self.scan_mem(mem);