Skip to content

Commit

Permalink
feat: remove hex dependency
Browse files Browse the repository at this point in the history
Replace the dependency with a custom hex impl. Given how small the impl
is, and how it replaces a complex call with a quite efficient one, this
is a net gain.
  • Loading branch information
vthib committed May 10, 2024
1 parent 6b6ce72 commit bb46e49
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 14 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions boreal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ exclude = ["/tests"]
default = ["hash", "object", "memmap", "process"]

# Enables the "hash" module.
hash = ["dep:md-5", "dep:sha1", "dep:sha2", "dep:hex", "dep:crc32fast", "dep:tlsh2"]
hash = ["dep:md-5", "dep:sha1", "dep:sha2", "dep:crc32fast", "dep:tlsh2"]

# Enables the "pe", "elf" and "macho" modules.
#
Expand All @@ -32,7 +32,7 @@ cuckoo = ["dep:serde_json", "yara/module-cuckoo"]

# Enables the "pe.signatures" module field.
# The `object` feature must also be enabled to get access to the "pe" module.
authenticode = ["dep:authenticode-parser", "dep:hex"]
authenticode = ["dep:authenticode-parser"]

# Adds an API to scan files using memory maps.
memmap = ["dep:memmap2"]
Expand All @@ -59,7 +59,6 @@ regex-syntax = { version = "0.8", default-features = false }

# "hash" feature
crc32fast = { version = "1.4", optional = true }
hex = { version = "0.4", optional = true }
md-5 = { version = "0.10", optional = true }
sha1 = { version = "0.10", optional = true }
sha2 = { version = "0.10", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion boreal/src/module/elf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ impl Elf {

let hash = Md5::digest(import_string);

Some(Value::Bytes(hex::encode(hash).into_bytes()))
Some(Value::Bytes(super::hex_encode(hash)))
}

#[cfg(feature = "hash")]
Expand Down
6 changes: 3 additions & 3 deletions boreal/src/module/hash.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::collections::HashMap;
use std::sync::RwLock;

use super::{EvalContext, Module, ModuleData, ModuleDataMap, StaticValue, Type, Value};
use super::{hex_encode, EvalContext, Module, ModuleData, ModuleDataMap, StaticValue, Type, Value};
use md5::{Digest, Md5};
use sha1::Sha1;
use sha2::Sha256;
Expand Down Expand Up @@ -84,7 +84,7 @@ impl ModuleData for Hash {
}

fn compute_hash_from_bytes<D: Digest>(bytes: &[u8]) -> Value {
Value::bytes(hex::encode(D::digest(bytes)))
Value::Bytes(hex_encode(&D::digest(bytes)))
}

fn compute_hash_from_mem<D: Digest>(
Expand All @@ -95,7 +95,7 @@ fn compute_hash_from_mem<D: Digest>(
let mut digest = D::new();

ctx.mem.on_range(offset, end, |data| digest.update(data))?;
Some(Value::bytes(hex::encode(digest.finalize())))
Some(Value::Bytes(hex_encode(&digest.finalize())))
}

impl Hash {
Expand Down
20 changes: 20 additions & 0 deletions boreal/src/module/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,26 @@ where
}
}

fn hex_encode<T: AsRef<[u8]>>(v: T) -> Vec<u8> {
hex_encode_inner(v.as_ref())
}

fn hex_encode_inner(v: &[u8]) -> Vec<u8> {
const DICT: &[u8] = b"0123456789abcdef";

// This code is actually the one i found generates the best codegen, with:
// - a single allocation for the vector with the right size
// - no bounds checking
v.iter()
.flat_map(|b| {
[
DICT[usize::from((*b & 0xF0) >> 4)],
DICT[usize::from(*b & 0x0F)],
]
})
.collect()
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion boreal/src/module/pe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2390,7 +2390,7 @@ impl Pe {
}
}

Some(Value::Bytes(hex::encode(hasher.finalize()).into_bytes()))
Some(Value::Bytes(super::hex_encode(hasher.finalize())))
}

fn rva_to_offset(ctx: &mut EvalContext, args: Vec<Value>) -> Option<Value> {
Expand Down
11 changes: 6 additions & 5 deletions boreal/src/module/pe/signatures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use authenticode_parser::{
use object::{pe, read::pe::DataDirectories};

use super::Value;
use crate::module::hex_encode;

pub fn get_signatures(
data_dirs: &DataDirectories,
Expand Down Expand Up @@ -37,9 +38,9 @@ fn process_authenticode(auth: &AuthenticodeArray) -> (Vec<Value>, bool) {
let verified = sig.verify_flags() == Some(AuthenticodeVerify::Valid);
is_signed = is_signed || verified;

let digest = sig.digest().map(hex::encode).map(Value::bytes);
let digest = sig.digest().map(hex_encode).map(Value::bytes);
let digest_alg = sig.digest_alg().map(Value::bytes);
let file_digest = sig.file_digest().map(hex::encode).map(Value::bytes);
let file_digest = sig.file_digest().map(hex_encode).map(Value::bytes);

// TODO on length_of_chain or other lengths, behavior is not aligned:
// yara does not save the length if the pointer is 0.
Expand Down Expand Up @@ -79,7 +80,7 @@ fn process_certs(certs: &[Certificate]) -> Vec<Value> {

fn signer_to_value(signer: &Signer) -> Value {
let program_name = signer.program_name().map(Value::bytes);
let digest = signer.digest().map(hex::encode).map(Value::bytes);
let digest = signer.digest().map(hex_encode).map(Value::bytes);
let digest_alg = signer.digest_alg().map(Value::bytes);
let chain = process_certs(signer.certificate_chain());

Expand All @@ -96,7 +97,7 @@ fn countersig_to_value(countersig: &Countersignature) -> Value {
let verified =
Value::Integer((countersig.verify_flags() == Some(CounterSignatureVerify::Valid)).into());
let sign_time = countersig.sign_time().into();
let digest = countersig.digest().map(hex::encode).map(Value::bytes);
let digest = countersig.digest().map(hex_encode).map(Value::bytes);
let digest_alg = countersig.digest_alg().map(Value::bytes);
let chain = process_certs(countersig.certificate_chain());

Expand All @@ -119,7 +120,7 @@ fn get_legacy_signer_data(sig: &Authenticode) -> HashMap<&'static str, Value> {
}

fn cert_to_map(cert: &Certificate, with_valid_on: bool) -> HashMap<&'static str, Value> {
let thumbprint_ascii = cert.sha1().map(hex::encode).map(Value::bytes);
let thumbprint_ascii = cert.sha1().map(hex_encode).map(Value::bytes);
let not_before = cert.not_before();
let not_after = cert.not_after();

Expand Down

0 comments on commit bb46e49

Please sign in to comment.