Skip to content

Commit

Permalink
feat: rework exports from boreal-parser
Browse files Browse the repository at this point in the history
Instead of re-exporting by hand in src/lib.rs, just exposes the modules.
This avoids forgetting to export types, which happens quite a lot.
  • Loading branch information
vthib committed Aug 2, 2023
1 parent adb1dc4 commit 3e8682b
Show file tree
Hide file tree
Showing 23 changed files with 109 additions and 117 deletions.
3 changes: 2 additions & 1 deletion boreal-parser/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! Parsing error types.
use std::num::ParseIntError;
use std::ops::Range;

Expand Down Expand Up @@ -163,7 +164,7 @@ impl ParseError<Input<'_>> for Error {
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ErrorKind {
pub(crate) enum ErrorKind {
/// A base64 modifier alphabet has an invalid length.
///
/// The length must be 64.
Expand Down
5 changes: 3 additions & 2 deletions boreal-parser/src/expression/boolean_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use nom::{
sequence::preceded,
};

use crate::{error::ErrorKind, Error, Regex};
use crate::error::{Error, ErrorKind};
use crate::regex::Regex;

use super::{
super::{
Expand All @@ -24,7 +25,7 @@ use super::{
};

/// parse or operator
pub fn boolean_expression(mut input: Input) -> ParseResult<Expression> {
pub(crate) fn boolean_expression(mut input: Input) -> ParseResult<Expression> {
// Expression parsing involves multiple recursives paths, making it quite complex.
// There are however a few observations we can make:
// - only two combinators receive recursive calls: this one and `primary_expression`.
Expand Down
7 changes: 3 additions & 4 deletions boreal-parser/src/expression/for_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,11 +351,10 @@ fn rule_enum_element(input: Input) -> ParseResult<SetElement> {
#[cfg(test)]
mod tests {
use super::*;
use crate::{
expression::{ExpressionKind, Identifier, IdentifierOperation},
test_helpers::{parse, parse_err, test_public_type},
IdentifierOperationType,
use crate::expression::{
ExpressionKind, Identifier, IdentifierOperation, IdentifierOperationType,
};
use crate::test_helpers::{parse, parse_err, test_public_type};

#[test]
fn test_for_selection() {
Expand Down
2 changes: 1 addition & 1 deletion boreal-parser/src/expression/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ use nom::{
};

use super::{Expression, Identifier, IdentifierOperation};
use crate::expression::IdentifierOperationType;
use crate::nom_recipes::{not_followed, rtrim};
use crate::string::identifier as raw_identifier;
use crate::types::{Input, ParseResult};
use crate::IdentifierOperationType;

use super::boolean_expression::boolean_expression;
use super::primary_expression::primary_expression;
Expand Down
4 changes: 3 additions & 1 deletion boreal-parser/src/expression/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! Types related to the condition part of YARA rules.
use std::ops::Range;

mod boolean_expression;
Expand All @@ -10,7 +11,7 @@ mod string_expression;

use crate::regex::Regex;

pub(super) use boolean_expression::boolean_expression as expression;
pub(crate) use boolean_expression::boolean_expression as expression;

const MAX_EXPR_RECURSION: usize = 20;

Expand Down Expand Up @@ -423,6 +424,7 @@ pub struct VariableSet {
pub elements: Vec<SetElement>,
}

/// Element of a set.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SetElement {
/// Name of the element.
Expand Down
17 changes: 6 additions & 11 deletions boreal-parser/src/expression/primary_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ use nom::{

use super::{expression, identifier, read_integer, string_expression, Expression, ExpressionKind};
use crate::{
error::ErrorKind,
error::{Error, ErrorKind},
nom_recipes::{rtrim, textual_tag as ttag},
number, regex, string,
types::{Input, ParseResult},
Error,
};

/// parse | operator
pub fn primary_expression(mut input: Input) -> ParseResult<Expression> {
pub(crate) fn primary_expression(mut input: Input) -> ParseResult<Expression> {
let start = input.pos();

if input.expr_recursion_counter >= super::MAX_EXPR_RECURSION {
Expand Down Expand Up @@ -257,17 +256,13 @@ where
mod tests {
use super::super::Identifier;
use super::{primary_expression as pe, Expression, ExpressionKind as Expr};
use crate::error::ErrorKind;
use crate::error::{Error, ErrorKind};
use crate::expression::ReadIntegerType;
use crate::expression::MAX_EXPR_RECURSION;
use crate::regex::{AssertionKind, Node, RepetitionKind};
use crate::regex::{AssertionKind, Node, Regex, RepetitionKind};
use crate::test_helpers::parse_err_type;
use crate::test_helpers::{parse, parse_check, parse_err};
use crate::types::Input;
use crate::Error;
use crate::{
expression::ReadIntegerType,
regex::Regex,
test_helpers::{parse, parse_check, parse_err},
};
use std::ops::Range;

#[test]
Expand Down
12 changes: 5 additions & 7 deletions boreal-parser/src/file.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Parse yara rules.
//! Types related to YARA files.
use std::ops::Range;

use nom::branch::alt;
Expand All @@ -8,7 +8,7 @@ use nom::combinator::map;
use nom::sequence::delimited;
use nom::{combinator::cut, sequence::preceded};

use crate::Rule;
use crate::rule::Rule;

use super::rule::rule;
use super::{
Expand Down Expand Up @@ -63,7 +63,7 @@ pub struct Include {
///
/// If the input cannot be parsed properly and entirely as a list
/// of yara rules, an error is returned.
pub fn parse_yara_file(input: Input) -> ParseResult<YaraFile> {
pub(crate) fn parse_yara_file(input: Input) -> ParseResult<YaraFile> {
let (mut input, _) = ltrim(input)?;

let mut file = YaraFile {
Expand Down Expand Up @@ -129,10 +129,8 @@ fn import(input: Input) -> ParseResult<Import> {
#[cfg(test)]
mod tests {
use super::*;
use crate::{
test_helpers::{parse, parse_err, test_public_type},
Expression, ExpressionKind,
};
use crate::expression::{Expression, ExpressionKind};
use crate::test_helpers::{parse, parse_err, test_public_type};

#[test]
fn test_parse_yara_file() {
Expand Down
21 changes: 5 additions & 16 deletions boreal-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,25 +42,14 @@
// - The returned input is right-trimmed
// The [`nom_recipes::rtrim`] function is provided to make this easier.

mod error;
pub use error::Error;
mod expression;
pub use expression::{
Expression, ExpressionKind, ForIterator, ForSelection, Identifier, IdentifierOperation,
IdentifierOperationType, ReadIntegerType, RuleSet, VariableSet,
};
mod file;
pub use file::{YaraFile, YaraFileComponent};
pub mod error;
pub mod expression;
pub mod file;
pub mod hex_string;
mod nom_recipes;
mod number;
pub mod regex;
pub use regex::Regex;
mod rule;
pub use rule::{
Metadata, Rule, VariableDeclaration, VariableDeclarationValue, VariableModifierBase64,
VariableModifiers,
};
pub mod rule;
mod string;
mod types;

Expand All @@ -70,7 +59,7 @@ mod types;
///
/// Returns an error if the parsing fails, or if there are
/// trailing data in the file that has not been parsed.
pub fn parse(input: &str) -> Result<YaraFile, Error> {
pub fn parse(input: &str) -> Result<file::YaraFile, error::Error> {
use nom::Finish;

let input = types::Input::new(input);
Expand Down
12 changes: 6 additions & 6 deletions boreal-parser/src/nom_recipes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use super::error::{Error, ErrorKind};
use super::types::{Input, ParseResult};

/// Right trim after the given parser.
pub fn rtrim<'a, F, O>(mut inner: F) -> impl FnMut(Input<'a>) -> ParseResult<'a, O>
pub(crate) fn rtrim<'a, F, O>(mut inner: F) -> impl FnMut(Input<'a>) -> ParseResult<'a, O>
where
F: Parser<Input<'a>, O, Error> + 'a,
{
Expand All @@ -33,7 +33,7 @@ where
}

/// Left trim the input.
pub fn ltrim(mut input: Input) -> ParseResult<()> {
pub(crate) fn ltrim(mut input: Input) -> ParseResult<()> {
loop {
match alt((
multiline_comment,
Expand All @@ -49,7 +49,7 @@ pub fn ltrim(mut input: Input) -> ParseResult<()> {
}

/// Accepts a first parser, only if the second one does not match afterwards
pub fn not_followed<'a, F, G, OF, OG>(
pub(crate) fn not_followed<'a, F, G, OF, OG>(
mut f: F,
mut g: G,
) -> impl FnMut(Input<'a>) -> ParseResult<'a, OF>
Expand All @@ -70,7 +70,7 @@ where
}

/// Accepts a single character if the passed function returns true on it.
pub fn take_one<F>(f: F) -> impl for<'a> Fn(Input<'a>) -> ParseResult<'a, char>
pub(crate) fn take_one<F>(f: F) -> impl for<'a> Fn(Input<'a>) -> ParseResult<'a, char>
where
F: Fn(char) -> bool,
{
Expand All @@ -89,7 +89,7 @@ where
/// following character is not alphanumeric.
/// This avoids recognizing a tag inside a word, for example, recognizing
/// `foo` in `foobar`.
pub fn textual_tag(
pub(crate) fn textual_tag(
tag: &'static str,
) -> impl for<'a> Fn(Input<'a>) -> ParseResult<'a, &'static str> {
move |input: Input| {
Expand Down Expand Up @@ -133,7 +133,7 @@ fn singleline_comment(input: Input) -> ParseResult<()> {
///
/// This allows using the starting input to generate a proper span
/// for the error.
pub fn map_res<'a, O1, O2, F, G>(
pub(crate) fn map_res<'a, O1, O2, F, G>(
mut parser: F,
mut f: G,
) -> impl FnMut(Input<'a>) -> ParseResult<O2>
Expand Down
4 changes: 2 additions & 2 deletions boreal-parser/src/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ fn octal_number(input: Input) -> ParseResult<i64> {
/// - hexadecimal with 0x prefix,
/// - octal with 0o prefix,
/// - decimal with optional KB/MB suffix.
pub fn number(input: Input) -> ParseResult<i64> {
pub(crate) fn number(input: Input) -> ParseResult<i64> {
// XXX: decimal number must be last, otherwise, it would parse the '0'
// in the '0x'/'0o' prefix.
alt((hexadecimal_number, octal_number, decimal_number))(input)
Expand All @@ -104,7 +104,7 @@ pub fn number(input: Input) -> ParseResult<i64> {
///
/// Equivalent to the _DOUBLE_ lexical pattern in libyara.
/// This functions matches the pattern `/\d+\.\d+/`.
pub fn double(input: Input) -> ParseResult<f64> {
pub(crate) fn double(input: Input) -> ParseResult<f64> {
let (input, payload) = rtrim(recognize(tuple((digit1, char('.'), digit1))))(input)?;

// Safety: this cannot fail, we are parsing `[0-9]+ '.' [0-9]+` which is guaranteed to
Expand Down
16 changes: 8 additions & 8 deletions boreal-parser/src/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ use super::{
expression::{self, Expression},
hex_string,
nom_recipes::{map_res, rtrim, textual_tag as ttag},
number, regex, string,
number, regex,
regex::Regex,
string,
types::{Input, ParseResult, Position},
Regex,
};

/// A Yara rule.
Expand Down Expand Up @@ -66,8 +67,11 @@ pub struct RuleTag {
/// Value associated with a metadata key.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum MetadataValue {
/// Bytestring variant.
Bytes(Vec<u8>),
/// Integer variant.
Integer(i64),
/// Boolean variant.
Boolean(bool),
}

Expand Down Expand Up @@ -114,10 +118,7 @@ pub struct VariableModifiers {
/// Xor modifier, providing the range.
pub xor: Option<(u8, u8)>,

/// Base64 modifier.alphabet.
///
/// This is only applicable if `flags` contains [`VariableFlags::BASE64`]
/// or [`VariableFlags::BASE64WIDE`].
/// Base64 modifier.
pub base64: Option<VariableModifierBase64>,
}

Expand Down Expand Up @@ -150,7 +151,7 @@ pub struct VariableDeclaration {
/// Parse a rule
///
/// Related to the `rule` pattern in `grammar.y` in libyara.
pub fn rule(mut input: Input) -> ParseResult<Rule> {
pub(crate) fn rule(mut input: Input) -> ParseResult<Rule> {
let mut is_private = false;
let mut is_global = false;

Expand Down Expand Up @@ -565,7 +566,6 @@ mod tests {
use crate::expression::{Expression, ExpressionKind, ForSelection, VariableSet};
use crate::hex_string::{Mask, Token};
use crate::test_helpers::test_public_type;
use crate::Regex;

use super::super::test_helpers::{parse, parse_err};
use super::*;
Expand Down
14 changes: 7 additions & 7 deletions boreal-parser/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,40 +41,40 @@ fn string_identifier_no_rtrim(input: Input) -> ParseResult<String> {
/// This is equivalent to the `_STRING_IDENTIFIER_` lexical patterns in
/// libyara.
/// Roughly equivalent to `$[a-ZA-Z0-9_]*`.
pub fn string_identifier(input: Input) -> ParseResult<String> {
pub(crate) fn string_identifier(input: Input) -> ParseResult<String> {
rtrim(string_identifier_no_rtrim)(input)
}

/// Parse a string identifier with an optional trailing wildcard.
///
/// This is equivalent to
/// `_STRING_IDENTIFIER_ | _STRING_IDENTIFIER_WITH_WILDCARD_` in libyara.
pub fn string_identifier_with_wildcard(input: Input) -> ParseResult<(String, bool)> {
pub(crate) fn string_identifier_with_wildcard(input: Input) -> ParseResult<(String, bool)> {
rtrim(pair(
string_identifier_no_rtrim,
map(opt(char('*')), |v| v.is_some()),
))(input)
}

/// Parse a string count, roughly equivalent to `#[a-zA-Z0-9_]*`.
pub fn count(input: Input) -> ParseResult<String> {
pub(crate) fn count(input: Input) -> ParseResult<String> {
rtrim(preceded(char('#'), cut(identifier_contents)))(input)
}

/// Parse a string offset, roughly equivalent to `@[a-zA-Z0-9_]*`.
pub fn offset(input: Input) -> ParseResult<String> {
pub(crate) fn offset(input: Input) -> ParseResult<String> {
rtrim(preceded(char('@'), cut(identifier_contents)))(input)
}

/// Parse a string length, roughly equivalent to `![a-zA-Z0-9_]*`.
pub fn length(input: Input) -> ParseResult<String> {
pub(crate) fn length(input: Input) -> ParseResult<String> {
rtrim(preceded(char('!'), cut(identifier_contents)))(input)
}

/// Parse an identifier.
///
/// This is roughly equivalent to `[a-ZA-Z_][a-zA-Z0-9_]*`.
pub fn identifier(input: Input) -> ParseResult<String> {
pub(crate) fn identifier(input: Input) -> ParseResult<String> {
rtrim(map(
recognize(tuple((
take_one(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '_')),
Expand All @@ -91,7 +91,7 @@ pub fn identifier(input: Input) -> ParseResult<String> {
/// patterns `\t`, `\r`, `\n`, `\"`, `\\`, and `\x[0-9a-fA-F]{2}`.
///
/// This parser allows non ascii bytes, hence returning a byte string.
pub fn quoted(input: Input) -> ParseResult<Vec<u8>> {
pub(crate) fn quoted(input: Input) -> ParseResult<Vec<u8>> {
rtrim(quoted_no_rtrim)(input)
}

Expand Down
Loading

0 comments on commit 3e8682b

Please sign in to comment.