Skip to content

Commit

Permalink
Merge branch 'VirusTotal:main' into macho-symhash
Browse files Browse the repository at this point in the history
  • Loading branch information
latonis authored Nov 21, 2024
2 parents fdfdf4b + c8dbcb8 commit aa84289
Show file tree
Hide file tree
Showing 46 changed files with 1,652 additions and 485 deletions.
4 changes: 2 additions & 2 deletions capi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ homepage.workspace = true

[features]
# The `capi` feature is required by `cargo-c`.
default = ["capi"]
default = ["capi", "rules-profiling"]
capi = []

# When enabled, the serialization of compiled rules include native code for
Expand All @@ -29,7 +29,7 @@ native-code-serialization = ["yara-x/native-code-serialization"]

# Enables rules profiling.
#
# This feature is diabled by default.
# This feature is enabled by default.
rules-profiling = ["yara-x/rules-profiling"]


Expand Down
34 changes: 34 additions & 0 deletions capi/include/yara_x.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@
// errors instead of warnings.
#define YRX_ERROR_ON_SLOW_LOOP 8

// Flag passed to [`yrx_compiler_create`] for enabling hoisting. This
// is a compiler optimization that moves invariant expressions out of
// loops, improving performance during the evaluation of rule conditions
// that contains loops.
#define YRX_ENABLE_HOISTING 16

// Types of metadata values.
typedef enum YRX_METADATA_TYPE {
I64,
Expand Down Expand Up @@ -220,6 +226,23 @@ typedef void (*YRX_RULE_CALLBACK)(const struct YRX_RULE *rule,
typedef void (*YRX_IMPORT_CALLBACK)(const char *module_name,
void *user_data);

// Callback function passed to [`yrx_scanner_iter_most_expensive_rules`].
//
// The callback function receives pointers to the namespace and rule name,
// and two float numbers with the time spent by the rule matching patterns
// and executing its condition. The pointers are valid as long as the callback
// function is being executed, but will be freed after the callback returns.
//
// The callback also receives a `user_data` pointer that can point to arbitrary
// data owned by the user.
//
// Requires the `rules-profiling` feature.
typedef void (*YRX_MOST_EXPENSIVE_RULES_CALLBACK)(const char *namespace,
const char *rule,
double pattern_matching_time,
double condition_exec_time,
void *user_data);

// Returns the error message for the most recent function in this API
// invoked by the current thread.
//
Expand Down Expand Up @@ -682,4 +705,15 @@ enum YRX_RESULT yrx_scanner_set_global_float(struct YRX_SCANNER *scanner,
const char *ident,
double value);

// Iterates over the top N most expensive rules, calling the callback for
// each rule.
//
// Requires the `rules-profiling` feature.
//
// See [`YRX_MOST_EXPENSIVE_RULES_CALLBACK`] for more details.
enum YRX_RESULT yrx_scanner_iter_most_expensive_rules(struct YRX_SCANNER *scanner,
size_t n,
YRX_MOST_EXPENSIVE_RULES_CALLBACK callback,
void *user_data);

#endif /* YARA_X */
9 changes: 9 additions & 0 deletions capi/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,20 @@ pub const YRX_ERROR_ON_SLOW_PATTERN: u32 = 4;
/// errors instead of warnings.
pub const YRX_ERROR_ON_SLOW_LOOP: u32 = 8;

/// Flag passed to [`yrx_compiler_create`] for enabling hoisting. This
/// is a compiler optimization that moves invariant expressions out of
/// loops, improving performance during the evaluation of rule conditions
/// that contains loops.
pub const YRX_ENABLE_HOISTING: u32 = 16;

fn _yrx_compiler_create<'a>(flags: u32) -> yara_x::Compiler<'a> {
let mut compiler = yara_x::Compiler::new();
if flags & YRX_RELAXED_RE_SYNTAX != 0 {
compiler.relaxed_re_syntax(true);
}
if flags & YRX_ENABLE_HOISTING != 0 {
compiler.hoisting(true);
}
if flags & YRX_COLORIZE_ERRORS != 0 {
compiler.colorize_errors(true);
}
Expand Down
59 changes: 58 additions & 1 deletion capi/src/scanner.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::ffi::{c_char, CStr};
#[cfg(feature = "rules-profiling")]
use std::ffi::CString;
use std::ffi::{c_char, c_void, CStr};
use std::slice;
use std::time::Duration;

Expand Down Expand Up @@ -302,3 +304,58 @@ unsafe fn slice_from_ptr_and_len<'a>(
};
Some(data)
}

/// Callback function passed to [`yrx_scanner_iter_most_expensive_rules`].
///
/// The callback function receives pointers to the namespace and rule name,
/// and two float numbers with the time spent by the rule matching patterns
/// and executing its condition. The pointers are valid as long as the callback
/// function is being executed, but will be freed after the callback returns.
///
/// The callback also receives a `user_data` pointer that can point to arbitrary
/// data owned by the user.
///
/// Requires the `rules-profiling` feature.
pub type YRX_MOST_EXPENSIVE_RULES_CALLBACK = extern "C" fn(
namespace: *const c_char,
rule: *const c_char,
pattern_matching_time: f64,
condition_exec_time: f64,
user_data: *mut c_void,
) -> ();

/// Iterates over the top N most expensive rules, calling the callback for
/// each rule.
///
/// Requires the `rules-profiling` feature.
///
/// See [`YRX_MOST_EXPENSIVE_RULES_CALLBACK`] for more details.
#[cfg(feature = "rules-profiling")]
#[no_mangle]
pub unsafe extern "C" fn yrx_scanner_iter_most_expensive_rules(
scanner: *mut YRX_SCANNER,
n: usize,
callback: YRX_MOST_EXPENSIVE_RULES_CALLBACK,
user_data: *mut c_void,
) -> YRX_RESULT {
if scanner.is_null() {
return YRX_RESULT::INVALID_ARGUMENT;
}

let scanner = scanner.as_ref().unwrap();

for profiling_info in scanner.inner.most_expensive_rules(n) {
let namespace = CString::new(profiling_info.namespace).unwrap();
let rule = CString::new(profiling_info.rule).unwrap();

callback(
namespace.as_ptr(),
rule.as_ptr(),
profiling_info.pattern_matching_time.as_secs_f64(),
profiling_info.condition_exec_time.as_secs_f64(),
user_data,
);
}

YRX_RESULT::SUCCESS
}
16 changes: 16 additions & 0 deletions go/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@ func RelaxedReSyntax(yes bool) CompileOption {
}
}

// Hoisting is an option for [NewCompiler] and [Compile] that enables a
// compiler optimization that moves invariant expressions out of loops,
// improving performance during the evaluation of rule conditions that
// contains loops.
func Hoisting(yes bool) CompileOption {
return func(c *Compiler) error {
c.hoisting = yes
return nil
}
}

// ErrorOnSlowPattern is an option for [NewCompiler] and [Compile] that
// tells the compiler to treat slow patterns as errors instead of warnings.
func ErrorOnSlowPattern(yes bool) CompileOption {
Expand Down Expand Up @@ -228,6 +239,7 @@ type bannedModule struct {
type Compiler struct {
cCompiler *C.YRX_COMPILER
relaxedReSyntax bool
hoisting bool
errorOnSlowPattern bool
errorOnSlowLoop bool
ignoredModules map[string]bool
Expand Down Expand Up @@ -256,6 +268,10 @@ func NewCompiler(opts ...CompileOption) (*Compiler, error) {
flags |= C.YRX_RELAXED_RE_SYNTAX
}

if c.hoisting {
flags |= C.YRX_ENABLE_HOISTING
}

if c.errorOnSlowPattern {
flags |= C.YRX_ERROR_ON_SLOW_PATTERN
}
Expand Down
7 changes: 7 additions & 0 deletions go/compiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ func TestRelaxedReSyntax(t *testing.T) {
assert.Len(t, scanResults.MatchingRules(), 1)
}

func TestHoisting(t *testing.T) {
_, err := Compile(`
rule test { condition: true }`,
Hoisting(true))
assert.NoError(t, err)
}

func TestErrorOnSlowPattern(t *testing.T) {
_, err := Compile(`
rule test { strings: $a = /a.*/ condition: $a }`,
Expand Down
57 changes: 56 additions & 1 deletion go/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,17 @@ package yara_x
// return yrx_scanner_on_matching_rule(scanner, callback, (void*) user_data);
// }
//
// void onMatchingRule(YRX_RULE*, uintptr_t);
// enum YRX_RESULT static inline _yrx_scanner_iter_most_expensive_rules(
// struct YRX_SCANNER *scanner,
// size_t n,
// YRX_MOST_EXPENSIVE_RULES_CALLBACK callback,
// uintptr_t most_expensive_rules_handle)
// {
// return yrx_scanner_iter_most_expensive_rules(scanner, n, callback, (void*) most_expensive_rules_handle);
// }
//
// extern void onMatchingRule(YRX_RULE*, uintptr_t);
// extern void mostExpensiveRulesCallback(char*, char*, double, double, uintptr_t);
import "C"

import (
Expand Down Expand Up @@ -237,6 +247,51 @@ func (s *Scanner) Scan(buf []byte) (*ScanResults, error) {
return scanResults, err
}

type ProfilingInfo struct {
Namespace string
Rule string
PatternMatchingTime float64
ConditionExecTime float64
}

// This is the callback called by yrx_rule_iter_patterns.
//
//export mostExpensiveRulesCallback
func mostExpensiveRulesCallback(
namespace *C.char,
rule *C.char,
patternMatchingTime C.double,
condExecTime C.double,
handle C.uintptr_t) {
h := cgo.Handle(handle)
profilingInfo, ok := h.Value().(*[]ProfilingInfo)
if !ok {
panic("mostExpensiveRulesCallback didn't receive a *[]ProfilingInfo")
}
*profilingInfo = append(*profilingInfo, ProfilingInfo{
Namespace: C.GoString(namespace),
Rule: C.GoString(rule),
PatternMatchingTime: float64(patternMatchingTime),
ConditionExecTime: float64(condExecTime),
})
}

func (s *Scanner) MostExpensiveRules(n int) []ProfilingInfo {
profilingInfo := make([]ProfilingInfo, 0)
mostExpensiveRules := cgo.NewHandle(&profilingInfo)
defer mostExpensiveRules.Delete()

if C._yrx_scanner_iter_most_expensive_rules(
s.cScanner,
C.size_t(n),
C.YRX_MOST_EXPENSIVE_RULES_CALLBACK(C.mostExpensiveRulesCallback),
C.uintptr_t(mostExpensiveRules)) != C.SUCCESS {
panic("yrx_scanner_iter_most_expensive_rules failed")
}

return profilingInfo
}

// Destroy destroys the scanner.
//
// Calling this method directly is not necessary, it will be invoked by the
Expand Down
12 changes: 12 additions & 0 deletions go/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,18 @@ func TestScannerTimeout(t *testing.T) {
assert.ErrorIs(t, err, ErrTimeout)
}

func TestScannerMostExpensiveRules(t *testing.T) {
r, _ := Compile("rule t { strings: $a = /a(.*)*a/ condition: $a }")
s := NewScanner(r)
_, err := s.Scan(bytes.Repeat([]byte("a"), 5000))
assert.NoError(t, err)
profilingInfo := s.MostExpensiveRules(1)
assert.Equal(t, "t", profilingInfo[0].Rule)
assert.Equal(t, "default", profilingInfo[0].Namespace)
assert.Greater(t, profilingInfo[0].PatternMatchingTime, float64(0))
assert.Greater(t, profilingInfo[0].ConditionExecTime, float64(0))
}

func TestScannerMetadata(t *testing.T) {
r, _ := Compile(`rule t {
meta:
Expand Down
Loading

0 comments on commit aa84289

Please sign in to comment.