diff --git a/Cargo.lock b/Cargo.lock index bb811b26..85c7017c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,7 +148,6 @@ dependencies = [ name = "boreal-parser" version = "0.2.0" dependencies = [ - "bitflags", "codespan-reporting", "nom", ] diff --git a/boreal-parser/Cargo.toml b/boreal-parser/Cargo.toml index 7b8ab94c..051d5427 100644 --- a/boreal-parser/Cargo.toml +++ b/boreal-parser/Cargo.toml @@ -15,8 +15,5 @@ rust-version = "1.62" # Parsing library nom = "7.1" -# packing multiple flags together -bitflags = "1.3" - # Proper error reporting on parsing codespan-reporting = "0.11" diff --git a/boreal-parser/src/lib.rs b/boreal-parser/src/lib.rs index 08285df3..e35a8542 100644 --- a/boreal-parser/src/lib.rs +++ b/boreal-parser/src/lib.rs @@ -58,7 +58,8 @@ pub mod regex; pub use regex::Regex; mod rule; pub use rule::{ - Metadata, Rule, VariableDeclaration, VariableDeclarationValue, VariableFlags, VariableModifiers, + Metadata, Rule, VariableDeclaration, VariableDeclarationValue, VariableModifierBase64, + VariableModifiers, }; mod string; mod types; diff --git a/boreal-parser/src/rule.rs b/boreal-parser/src/rule.rs index 3adce26f..64c090b4 100644 --- a/boreal-parser/src/rule.rs +++ b/boreal-parser/src/rule.rs @@ -1,7 +1,6 @@ //! Parse yara rules. use std::ops::Range; -use bitflags::bitflags; use nom::{ branch::alt, character::complete::char, @@ -81,29 +80,6 @@ pub struct Metadata { pub value: MetadataValue, } -bitflags! { - /// Modifier flags, see [`VariableModifiers`]. - #[derive(Default)] - pub struct VariableFlags: u32 { - /// Wide modifier - const WIDE = 0b0000_0001; - /// Ascii modifier - const ASCII = 0b000_0010; - /// Nocase modifier - const NOCASE = 0b0000_0100; - /// Fullword modifier - const FULLWORD = 0b0000_1000; - /// Private modifier - const PRIVATE = 0b0001_0000; - /// Xor modifier, related to [`VariableModifiers::xor_range`] - const XOR = 0b0010_0000; - /// base64 modifier, related to [`VariableModifiers::base64_alphabet`] - const BASE64 = 0b0100_0000; - /// base64wide modifier, related to [`VariableModifiers::base64_alphabet`] - const BASE64WIDE = 0b1000_0000; - } -} - /// Value for a string associated with a rule. #[derive(Clone, Debug, PartialEq)] pub enum VariableDeclarationValue { @@ -117,18 +93,45 @@ pub enum VariableDeclarationValue { /// Modifiers applicable on a string. #[derive(Clone, Default, Debug, PartialEq, Eq)] +// Completely useless lint +#[allow(clippy::struct_excessive_bools)] pub struct VariableModifiers { - /// Bitflags of possibles flags modifying the string. - pub flags: VariableFlags, - /// Xor range. - /// - /// This is only applicable if `flags` contains [`VariableFlags::XOR`]. - pub xor_range: (u8, u8), - /// Base64 alphabet. + /// Wide modifier. + pub wide: bool, + + /// Ascii modifier. + pub ascii: bool, + + /// Nocase modifier. + pub nocase: bool, + + /// Fullword modifier. + pub fullword: bool, + + /// Private modifier. + pub private: bool, + + /// Xor modifier, providing the range. + pub xor: Option<(u8, u8)>, + + /// Base64 modifier.alphabet. /// /// This is only applicable if `flags` contains [`VariableFlags::BASE64`] /// or [`VariableFlags::BASE64WIDE`]. - pub base64_alphabet: Option<[u8; 64]>, + pub base64: Option, +} + +/// Base64 variable modifier. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct VariableModifierBase64 { + /// Wide version. + pub wide: bool, + + /// Ascii verison. + pub ascii: bool, + + /// Alphabet to use to deserialize, if provided. + pub alphabet: Option<[u8; 64]>, } /// String declared in a rule. @@ -304,75 +307,100 @@ fn string_declaration(input: Input) -> ParseResult { /// A single parsed modifier #[derive(Clone, Debug, PartialEq)] enum Modifier { - // Must not use this enum value for the flags XOR and BASE64(WIDE). - // Instead, use the other enum values to ensure the associated data - // is properly set. - Flag(VariableFlags), + Wide, + Ascii, + Nocase, + Fullword, + Private, Xor(u8, u8), - Base64(Option<[u8; 64]>), - Base64Wide(Option<[u8; 64]>), + Base64 { + wide: bool, + alphabet: Option<[u8; 64]>, + }, +} + +fn modifiers_duplicated(modifier_name: &str, start: Position, input: Input) -> nom::Err { + nom::Err::Failure(Error::new( + input.get_span_from(start), + ErrorKind::ModifiersDuplicated { + modifier_name: modifier_name.to_string(), + }, + )) } fn accumulate_modifiers(parser: F, mut input: Input) -> ParseResult where F: Fn(Input) -> ParseResult, { - let add_flag = - |modifiers: &mut VariableModifiers, start: Position, input: Input, flag: VariableFlags| { - if modifiers.flags.contains(flag) { - return Err(nom::Err::Failure(Error::new( - input.get_span_from(start), - ErrorKind::ModifiersDuplicated { - modifier_name: format!("{flag:?}"), - }, - ))); - } - modifiers.flags |= flag; - Ok(()) - }; - let mut modifiers = VariableModifiers::default(); let start = input.pos(); let mut parser = opt(parser); while let (i, Some(modifier)) = parser(input)? { match modifier { - Modifier::Flag(flag) => { - add_flag(&mut modifiers, input.pos(), i, flag)?; + Modifier::Wide => { + if modifiers.wide { + return Err(modifiers_duplicated("wide", input.pos(), i)); + } + modifiers.wide = true; } - Modifier::Xor(from, to) => { - add_flag(&mut modifiers, input.pos(), i, VariableFlags::XOR)?; - modifiers.xor_range = (from, to); + Modifier::Ascii => { + if modifiers.ascii { + return Err(modifiers_duplicated("ascii", input.pos(), i)); + } + modifiers.ascii = true; + } + Modifier::Nocase => { + if modifiers.nocase { + return Err(modifiers_duplicated("nocase", input.pos(), i)); + } + modifiers.nocase = true; + } + Modifier::Fullword => { + if modifiers.fullword { + return Err(modifiers_duplicated("fullword", input.pos(), i)); + } + modifiers.fullword = true; } - Modifier::Base64(alphabet) => { - if modifiers.flags.contains(VariableFlags::BASE64WIDE) - && modifiers.base64_alphabet != alphabet - { - return Err(nom::Err::Failure(Error::new( - i.get_span_from(input.pos()), - ErrorKind::Base64AlphabetIncompatible, - ))); + Modifier::Private => { + if modifiers.private { + return Err(modifiers_duplicated("private", input.pos(), i)); } - add_flag(&mut modifiers, input.pos(), i, VariableFlags::BASE64)?; - modifiers.base64_alphabet = alphabet; + modifiers.private = true; } - Modifier::Base64Wide(alphabet) => { - if modifiers.flags.contains(VariableFlags::BASE64) - && modifiers.base64_alphabet != alphabet - { - return Err(nom::Err::Failure(Error::new( - i.get_span_from(input.pos()), - ErrorKind::Base64AlphabetIncompatible, - ))); + Modifier::Xor(from, to) => { + if modifiers.xor.is_some() { + return Err(modifiers_duplicated("xor", input.pos(), i)); } - add_flag(&mut modifiers, input.pos(), i, VariableFlags::BASE64WIDE)?; - modifiers.base64_alphabet = alphabet; + modifiers.xor = Some((from, to)); } + Modifier::Base64 { wide, alphabet } => match &mut modifiers.base64 { + Some(base64) => { + if wide && std::mem::replace(&mut base64.wide, true) { + return Err(modifiers_duplicated("base64wide", input.pos(), i)); + } else if !wide && std::mem::replace(&mut base64.ascii, true) { + return Err(modifiers_duplicated("base64", input.pos(), i)); + } else if alphabet != base64.alphabet { + return Err(nom::Err::Failure(Error::new( + i.get_span_from(input.pos()), + ErrorKind::Base64AlphabetIncompatible, + ))); + } + base64.alphabet = alphabet; + } + None => { + modifiers.base64 = Some(VariableModifierBase64 { + ascii: !wide, + wide, + alphabet, + }); + } + }, } input = i; } - if let Err(kind) = validate_flags(modifiers.flags) { + if let Err(kind) = validate_modifiers(&modifiers) { return Err(nom::Err::Failure(Error::new( input.get_span_from(start), kind, @@ -382,52 +410,34 @@ where Ok((input, modifiers)) } -fn validate_flags(flags: VariableFlags) -> Result<(), ErrorKind> { - if flags.contains(VariableFlags::XOR) { - if flags.contains(VariableFlags::NOCASE) { +fn validate_modifiers(modifiers: &VariableModifiers) -> Result<(), ErrorKind> { + if modifiers.xor.is_some() { + if modifiers.nocase { return Err(ErrorKind::ModifiersIncompatible { first_modifier_name: "xor".to_owned(), second_modifier_name: "nocase".to_owned(), }); } - if flags.contains(VariableFlags::BASE64) { - return Err(ErrorKind::ModifiersIncompatible { - first_modifier_name: "base64".to_owned(), - second_modifier_name: "xor".to_owned(), - }); - } - if flags.contains(VariableFlags::BASE64WIDE) { + if let Some(base64) = &modifiers.base64 { return Err(ErrorKind::ModifiersIncompatible { - first_modifier_name: "base64wide".to_owned(), + first_modifier_name: if base64.ascii { "base64" } else { "base64wide" }.to_owned(), second_modifier_name: "xor".to_owned(), }); } } - if flags.contains(VariableFlags::NOCASE) { - if flags.contains(VariableFlags::BASE64) { + if modifiers.nocase { + if let Some(base64) = &modifiers.base64 { return Err(ErrorKind::ModifiersIncompatible { - first_modifier_name: "base64".to_owned(), - second_modifier_name: "nocase".to_owned(), - }); - } - if flags.contains(VariableFlags::BASE64WIDE) { - return Err(ErrorKind::ModifiersIncompatible { - first_modifier_name: "base64wide".to_owned(), + first_modifier_name: if base64.ascii { "base64" } else { "base64wide" }.to_owned(), second_modifier_name: "nocase".to_owned(), }); } } - if flags.contains(VariableFlags::FULLWORD) { - if flags.contains(VariableFlags::BASE64) { - return Err(ErrorKind::ModifiersIncompatible { - first_modifier_name: "base64".to_owned(), - second_modifier_name: "fullword".to_owned(), - }); - } - if flags.contains(VariableFlags::BASE64WIDE) { + if modifiers.fullword { + if let Some(base64) = &modifiers.base64 { return Err(ErrorKind::ModifiersIncompatible { - first_modifier_name: "base64wide".to_owned(), + first_modifier_name: if base64.ascii { "base64" } else { "base64wide" }.to_owned(), second_modifier_name: "fullword".to_owned(), }); } @@ -450,19 +460,11 @@ fn hex_string_modifiers(input: Input) -> ParseResult { fn string_modifier(input: Input) -> ParseResult { alt(( - map(rtrim(ttag("wide")), |_| Modifier::Flag(VariableFlags::WIDE)), - map(rtrim(ttag("ascii")), |_| { - Modifier::Flag(VariableFlags::ASCII) - }), - map(rtrim(ttag("nocase")), |_| { - Modifier::Flag(VariableFlags::NOCASE) - }), - map(rtrim(ttag("fullword")), |_| { - Modifier::Flag(VariableFlags::FULLWORD) - }), - map(rtrim(ttag("private")), |_| { - Modifier::Flag(VariableFlags::PRIVATE) - }), + map(rtrim(ttag("wide")), |_| Modifier::Wide), + map(rtrim(ttag("ascii")), |_| Modifier::Ascii), + map(rtrim(ttag("nocase")), |_| Modifier::Nocase), + map(rtrim(ttag("fullword")), |_| Modifier::Fullword), + map(rtrim(ttag("private")), |_| Modifier::Private), xor_modifier, base64_modifier, ))(input) @@ -470,20 +472,16 @@ fn string_modifier(input: Input) -> ParseResult { fn regex_modifier(input: Input) -> ParseResult { rtrim(alt(( - map(ttag("wide"), |_| Modifier::Flag(VariableFlags::WIDE)), - map(ttag("ascii"), |_| Modifier::Flag(VariableFlags::ASCII)), - map(ttag("nocase"), |_| Modifier::Flag(VariableFlags::NOCASE)), - map(ttag("fullword"), |_| { - Modifier::Flag(VariableFlags::FULLWORD) - }), - map(ttag("private"), |_| Modifier::Flag(VariableFlags::PRIVATE)), + map(ttag("wide"), |_| Modifier::Wide), + map(ttag("ascii"), |_| Modifier::Ascii), + map(ttag("nocase"), |_| Modifier::Nocase), + map(ttag("fullword"), |_| Modifier::Fullword), + map(ttag("private"), |_| Modifier::Private), )))(input) } fn hex_string_modifier(input: Input) -> ParseResult { - map(rtrim(ttag("private")), |_| { - Modifier::Flag(VariableFlags::PRIVATE) - })(input) + map(rtrim(ttag("private")), |_| Modifier::Private)(input) } /// Parse a XOR modifier, ie: @@ -522,7 +520,7 @@ fn xor_modifier(input: Input) -> ParseResult { /// - `'base64(wide)'` /// - `'base64(wide)' '(' string ')'` fn base64_modifier(input: Input) -> ParseResult { - let (input, is_wide) = rtrim(alt(( + let (input, wide) = rtrim(alt(( map(ttag("base64"), |_| false), map(ttag("base64wide"), |_| true), )))(input)?; @@ -547,14 +545,7 @@ fn base64_modifier(input: Input) -> ParseResult { input = input2; } - Ok(( - input, - if is_wide { - Modifier::Base64Wide(alphabet) - } else { - Modifier::Base64(alphabet) - }, - )) + Ok((input, Modifier::Base64 { wide, alphabet })) } fn number_to_u8(value: i64) -> Result { @@ -682,12 +673,13 @@ mod tests { "private wide ascii xor Xor", "Xor", VariableModifiers { - flags: VariableFlags::PRIVATE - | VariableFlags::WIDE - | VariableFlags::ASCII - | VariableFlags::XOR, - xor_range: (0, 255), - base64_alphabet: None, + wide: true, + ascii: true, + nocase: false, + fullword: false, + private: true, + xor: Some((0, 255)), + base64: None, }, ); parse( @@ -695,9 +687,13 @@ mod tests { "nocase fullword", "", VariableModifiers { - flags: VariableFlags::NOCASE | VariableFlags::FULLWORD, - xor_range: (0, 0), - base64_alphabet: None, + wide: false, + ascii: false, + nocase: true, + fullword: true, + private: false, + xor: None, + base64: None, }, ); parse( @@ -705,9 +701,17 @@ mod tests { "base64wide ascii", "", VariableModifiers { - flags: VariableFlags::BASE64WIDE | VariableFlags::ASCII, - xor_range: (0, 0), - base64_alphabet: None, + wide: false, + ascii: true, + nocase: false, + fullword: false, + private: false, + xor: None, + base64: Some(VariableModifierBase64 { + wide: true, + ascii: false, + alphabet: None, + }), }, ); @@ -716,9 +720,13 @@ mod tests { "xor ( 15 )", "", VariableModifiers { - flags: VariableFlags::XOR, - xor_range: (15, 15), - base64_alphabet: None, + wide: false, + ascii: false, + nocase: false, + fullword: false, + private: false, + xor: Some((15, 15)), + base64: None, }, ); parse( @@ -726,9 +734,13 @@ mod tests { "xor (50 - 120) private", "", VariableModifiers { - flags: VariableFlags::XOR | VariableFlags::PRIVATE, - xor_range: (50, 120), - base64_alphabet: None, + wide: false, + ascii: false, + nocase: false, + fullword: false, + private: true, + xor: Some((50, 120)), + base64: None, }, ); @@ -739,9 +751,17 @@ mod tests { &format!("base64( \"{alphabet}\" )"), "", VariableModifiers { - flags: VariableFlags::BASE64, - xor_range: (0, 0), - base64_alphabet: Some(alphabet_array), + wide: false, + ascii: false, + nocase: false, + fullword: false, + private: false, + xor: None, + base64: Some(VariableModifierBase64 { + wide: false, + ascii: true, + alphabet: Some(alphabet_array), + }), }, ); parse( @@ -749,9 +769,17 @@ mod tests { &format!("base64wide ( \"{alphabet}\" ) private"), "", VariableModifiers { - flags: VariableFlags::BASE64WIDE | VariableFlags::PRIVATE, - xor_range: (0, 0), - base64_alphabet: Some(alphabet_array), + wide: false, + ascii: false, + nocase: false, + fullword: false, + private: true, + xor: None, + base64: Some(VariableModifierBase64 { + wide: true, + ascii: false, + alphabet: Some(alphabet_array), + }), }, ); parse( @@ -759,9 +787,17 @@ mod tests { &format!("base64wide ( \"{alphabet}\" ) base64 (\"{alphabet}\")"), "", VariableModifiers { - flags: VariableFlags::BASE64WIDE | VariableFlags::BASE64, - xor_range: (0, 0), - base64_alphabet: Some(alphabet_array), + wide: false, + ascii: false, + nocase: false, + fullword: false, + private: false, + xor: None, + base64: Some(VariableModifierBase64 { + wide: true, + ascii: true, + alphabet: Some(alphabet_array), + }), }, ); @@ -770,12 +806,13 @@ mod tests { "private wide ascii nocase fullword base64", "base64", VariableModifiers { - flags: VariableFlags::PRIVATE - | VariableFlags::WIDE - | VariableFlags::ASCII - | VariableFlags::NOCASE - | VariableFlags::FULLWORD, - ..VariableModifiers::default() + wide: true, + ascii: true, + nocase: true, + fullword: true, + private: true, + xor: None, + base64: None, }, ); @@ -784,8 +821,13 @@ mod tests { "private wide", "wide", VariableModifiers { - flags: VariableFlags::PRIVATE, - ..VariableModifiers::default() + wide: false, + ascii: false, + nocase: false, + fullword: false, + private: true, + xor: None, + base64: None, }, ); @@ -873,8 +915,8 @@ mod tests { name: "a".to_owned(), value: VariableDeclarationValue::Bytes(b"b\td".to_vec()), modifiers: VariableModifiers { - flags: VariableFlags::XOR | VariableFlags::ASCII, - xor_range: (0, 255), + ascii: true, + xor: Some((0, 255)), ..VariableModifiers::default() }, span: 10..30, @@ -895,7 +937,6 @@ mod tests { span: 38..43, }), modifiers: VariableModifiers { - flags: VariableFlags::empty(), ..VariableModifiers::default() }, span: 34..43, @@ -907,7 +948,7 @@ mod tests { Mask::Left, )]), modifiers: VariableModifiers { - flags: VariableFlags::PRIVATE, + private: true, ..VariableModifiers::default() }, span: 45..61, @@ -955,10 +996,7 @@ mod tests { VariableDeclaration { name: "b".to_owned(), value: VariableDeclarationValue::Bytes(b"t".to_vec()), - modifiers: VariableModifiers { - flags: VariableFlags::empty(), - ..VariableModifiers::default() - }, + modifiers: VariableModifiers::default(), span: 60..68, } ], @@ -1093,24 +1131,41 @@ mod tests { let alphabet = "!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu"; let alphabet_array: [u8; 64] = alphabet.as_bytes().try_into().unwrap(); - parse(base64_modifier, "base64 a", "a", Modifier::Base64(None)); + parse( + base64_modifier, + "base64 a", + "a", + Modifier::Base64 { + wide: false, + alphabet: None, + }, + ); parse( base64_modifier, "base64wide a", "a", - Modifier::Base64Wide(None), + Modifier::Base64 { + wide: true, + alphabet: None, + }, ); parse( base64_modifier, &format!(r#"base64("{alphabet}")"#), "", - Modifier::Base64(Some(alphabet_array)), + Modifier::Base64 { + wide: false, + alphabet: Some(alphabet_array), + }, ); parse( base64_modifier, &format!(r#"base64wide ( "{alphabet}")b"#), "b", - Modifier::Base64Wide(Some(alphabet_array)), + Modifier::Base64 { + wide: true, + alphabet: Some(alphabet_array), + }, ); parse_err(base64_modifier, ""); diff --git a/boreal/src/compiler/variable.rs b/boreal/src/compiler/variable.rs index 98ad3733..71a0adfa 100644 --- a/boreal/src/compiler/variable.rs +++ b/boreal/src/compiler/variable.rs @@ -1,7 +1,7 @@ use std::ops::Range; +use boreal_parser::VariableModifiers; use boreal_parser::{VariableDeclaration, VariableDeclarationValue}; -use boreal_parser::{VariableFlags, VariableModifiers}; use crate::regex::Regex; @@ -30,7 +30,7 @@ pub struct Variable { pub literals: Vec>, /// Flags related to variable modifiers. - flags: VariableFlags, + flags: Flags, /// Type of matching for the variable. matcher_type: MatcherType, @@ -44,6 +44,16 @@ pub struct Variable { non_wide_regex: Option, } +#[derive(Copy, Clone, Debug)] +// Completely useless lint +#[allow(clippy::struct_excessive_bools)] +struct Flags { + fullword: bool, + ascii: bool, + wide: bool, + nocase: bool, +} + #[derive(Debug)] enum MatcherType { /// The literals cover entirely the variable. @@ -85,8 +95,8 @@ pub(crate) fn compile_variable(decl: VariableDeclaration) -> Result Result { if case_insensitive { - modifiers.flags.insert(VariableFlags::NOCASE); + modifiers.nocase = true; } - regex::compile_regex(&ast, case_insensitive, dot_all, modifiers.flags) + regex::compile_regex(&ast, case_insensitive, dot_all, &modifiers) } VariableDeclarationValue::HexString(hex_string) => { // Fullword and wide is not compatible with hex strings - modifiers.flags.remove(VariableFlags::FULLWORD); - modifiers.flags.remove(VariableFlags::WIDE); + modifiers.fullword = false; + modifiers.wide = false; if hex_string::can_use_only_literals(&hex_string) { Ok(CompiledVariable { @@ -115,7 +125,7 @@ pub(crate) fn compile_variable(decl: VariableDeclaration) -> Result Result Ok(Variable { name, - is_private: modifiers.flags.contains(VariableFlags::PRIVATE), + is_private: modifiers.private, literals, - flags: modifiers.flags, + flags: Flags { + fullword: modifiers.fullword, + ascii: modifiers.ascii, + wide: modifiers.wide, + nocase: modifiers.nocase, + }, matcher_type, non_wide_regex, }), @@ -150,8 +165,8 @@ struct CompiledVariable { fn compile_bytes(value: Vec, modifiers: &VariableModifiers) -> CompiledVariable { let mut literals = Vec::with_capacity(2); - if modifiers.flags.contains(VariableFlags::WIDE) { - if modifiers.flags.contains(VariableFlags::ASCII) { + if modifiers.wide { + if modifiers.ascii { literals.push(string_to_wide(&value)); literals.push(value); } else { @@ -161,9 +176,9 @@ fn compile_bytes(value: Vec, modifiers: &VariableModifiers) -> CompiledVaria literals.push(value); } - if modifiers.flags.contains(VariableFlags::XOR) { + if let Some(xor_range) = modifiers.xor { // For each literal, for each byte in the xor range, build a new literal - let xor_range = modifiers.xor_range.0..=modifiers.xor_range.1; + let xor_range = xor_range.0..=xor_range.1; let xor_range_len = xor_range.len(); // modifiers.xor_range.1.saturating_sub(modifiers.xor_range.0) + 1; let mut new_literals: Vec> = Vec::with_capacity(literals.len() * xor_range_len); for lit in literals { @@ -178,27 +193,25 @@ fn compile_bytes(value: Vec, modifiers: &VariableModifiers) -> CompiledVaria }; } - if modifiers.flags.contains(VariableFlags::BASE64) - || modifiers.flags.contains(VariableFlags::BASE64WIDE) - { + if let Some(base64) = &modifiers.base64 { let mut old_literals = Vec::with_capacity(literals.len() * 3); std::mem::swap(&mut old_literals, &mut literals); - if modifiers.flags.contains(VariableFlags::BASE64) { + if base64.ascii { for lit in &old_literals { for offset in 0..=2 { - if let Some(lit) = encode_base64(lit, &modifiers.base64_alphabet, offset) { - if modifiers.flags.contains(VariableFlags::BASE64WIDE) { + if let Some(lit) = encode_base64(lit, &base64.alphabet, offset) { + if base64.wide { literals.push(string_to_wide(&lit)); } literals.push(lit); } } } - } else if modifiers.flags.contains(VariableFlags::BASE64WIDE) { + } else { for lit in &old_literals { for offset in 0..=2 { - if let Some(lit) = encode_base64(lit, &modifiers.base64_alphabet, offset) { + if let Some(lit) = encode_base64(lit, &base64.alphabet, offset) { literals.push(string_to_wide(&lit)); } } @@ -222,7 +235,7 @@ impl Variable { pub fn confirm_ac_literal(&self, mem: &[u8], mat: &Range, literal_index: usize) -> bool { let literal = &self.literals[literal_index]; - if self.flags.contains(VariableFlags::NOCASE) { + if self.flags.nocase { if !literal.eq_ignore_ascii_case(&mem[mat.start..mat.end]) { return false; } @@ -312,7 +325,7 @@ impl Variable { } fn validate_and_update_match(&self, mem: &[u8], mat: Range) -> Option> { - if self.flags.contains(VariableFlags::FULLWORD) && !check_fullword(mem, &mat, self.flags) { + if self.flags.fullword && !check_fullword(mem, &mat, self.flags) { return None; } @@ -324,13 +337,13 @@ impl Variable { } /// Check the match respects a possible fullword modifier for the variable. -fn check_fullword(mem: &[u8], mat: &Range, flags: VariableFlags) -> bool { +fn check_fullword(mem: &[u8], mat: &Range, flags: Flags) -> bool { // TODO: We need to know if the match is done on an ascii or wide string to properly check for // fullword constraints. This is done in a very ugly way, by going through the match. // A better way would be to know which alternation in the match was found. let mut match_is_wide = false; - if flags.contains(VariableFlags::WIDE) { + if flags.wide { match_is_wide = is_match_wide(mat, mem); if match_is_wide { if mat.start > 1 @@ -347,7 +360,7 @@ fn check_fullword(mem: &[u8], mat: &Range, flags: VariableFlags) -> bool } } } - if flags.contains(VariableFlags::ASCII) && !match_is_wide { + if flags.ascii && !match_is_wide { if mat.start > 0 && mem[mat.start - 1].is_ascii_alphanumeric() { return false; } diff --git a/boreal/src/compiler/variable/regex.rs b/boreal/src/compiler/variable/regex.rs index 04680fe4..2d803476 100644 --- a/boreal/src/compiler/variable/regex.rs +++ b/boreal/src/compiler/variable/regex.rs @@ -1,5 +1,5 @@ use boreal_parser::regex::{AssertionKind, Node}; -use boreal_parser::VariableFlags; +use boreal_parser::VariableModifiers; use crate::regex::{regex_ast_to_string, visit, Regex, VisitAction, Visitor}; @@ -16,9 +16,9 @@ pub(super) fn compile_regex( ast: &Node, mut case_insensitive: bool, dot_all: bool, - flags: VariableFlags, + modifiers: &VariableModifiers, ) -> Result { - if flags.contains(VariableFlags::NOCASE) { + if modifiers.nocase { case_insensitive = true; } @@ -36,7 +36,7 @@ pub(super) fn compile_regex( let matcher_type = if use_ac { let pre = match pre_ast { Some(ast) => { - let (pre, has_ww_boundaries) = convert_ast_to_string_with_flags(&ast, flags); + let (pre, has_ww_boundaries) = convert_ast_to_string_with_flags(&ast, modifiers); has_wide_word_boundaries |= has_ww_boundaries; Some(pre) } @@ -44,26 +44,26 @@ pub(super) fn compile_regex( }; let post = match post_ast { Some(ast) => { - let (post, has_ww_boundaries) = convert_ast_to_string_with_flags(&ast, flags); + let (post, has_ww_boundaries) = convert_ast_to_string_with_flags(&ast, modifiers); has_wide_word_boundaries |= has_ww_boundaries; Some(post) } None => None, }; - apply_ascii_wide_flags_on_literals(&mut literals, flags); + apply_ascii_wide_flags_on_literals(&mut literals, modifiers); MatcherType::Atomized { left_validator: compile_validator(pre, case_insensitive, dot_all)?, right_validator: compile_validator(post, case_insensitive, dot_all)?, } } else { - let (expr, has_ww_boundaries) = convert_ast_to_string_with_flags(ast, flags); + let (expr, has_ww_boundaries) = convert_ast_to_string_with_flags(ast, modifiers); has_wide_word_boundaries |= has_ww_boundaries; if literals.iter().any(|lit| lit.len() < 2) { literals.clear(); } else { - apply_ascii_wide_flags_on_literals(&mut literals, flags); + apply_ascii_wide_flags_on_literals(&mut literals, modifiers); } MatcherType::Raw(compile_regex_expr(&expr, case_insensitive, dot_all)?) @@ -128,12 +128,12 @@ fn compile_validator( } } -fn apply_ascii_wide_flags_on_literals(literals: &mut Vec>, flags: VariableFlags) { - if !flags.contains(VariableFlags::WIDE) { +fn apply_ascii_wide_flags_on_literals(literals: &mut Vec>, modifiers: &VariableModifiers) { + if !modifiers.wide { return; } - if flags.contains(VariableFlags::ASCII) { + if modifiers.ascii { let wide_literals: Vec<_> = literals.iter().map(|v| widen_literal(v)).collect(); literals.extend(wide_literals); } else { @@ -153,11 +153,11 @@ fn widen_literal(literal: &[u8]) -> Vec { } /// Convert the AST of a regex variable to a string, taking into account variable modifiers. -fn convert_ast_to_string_with_flags(ast: &Node, flags: VariableFlags) -> (String, bool) { - if flags.contains(VariableFlags::WIDE) { +fn convert_ast_to_string_with_flags(ast: &Node, modifiers: &VariableModifiers) -> (String, bool) { + if modifiers.wide { let (wide_ast, has_wide_word_boundaries) = visit(ast, AstWidener::new()); - let expr = if flags.contains(VariableFlags::ASCII) { + let expr = if modifiers.ascii { format!( "{}|{}", regex_ast_to_string(ast), diff --git a/boreal/tests/it/libyara_compat/rules.rs b/boreal/tests/it/libyara_compat/rules.rs index 2692f35c..7434590f 100644 --- a/boreal/tests/it/libyara_compat/rules.rs +++ b/boreal/tests/it/libyara_compat/rules.rs @@ -1070,7 +1070,7 @@ fn test_strings() { condition: $a }", - "mem:3:23: error: string modifier XOR appears multiple times", + "mem:3:23: error: string modifier xor appears multiple times", ); // We should have no matches here because we are not generating the wide