Skip to content

Commit

Permalink
Added ExprMatcher
Browse files Browse the repository at this point in the history
  • Loading branch information
maoueh committed Aug 26, 2024
1 parent 6e9e1d5 commit b3d100c
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 55 deletions.
19 changes: 16 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

- Added [ExprMatcher](https://docs.rs/substreams/latest/substreams/struct.ExprMatcher.html), and constructor(s) [expr_matcher](https://docs.rs/substreams/latest/substreams/struct.ExprMatcher.html#method.new) and [ExprMatcher::new](https://docs.rs/substreams/latest/substreams/fn.expr_matcher.html). This can be used to parse an expression once and re-used it to run multiple [matches_keys](https://docs.rs/substreams/latest/substreams/struct.ExprMatcher.html#method.matches_keys) against different keys:

```rust
let matcher = substreams::expr_matcher(&query);

transactions.flat_map(|trx| {
trx.instructions()
.filter(|view| matcher.matches_keys(&vec![format!("program:{}", view.program_id().to_string())]))
});
```

## 0.5.21

- Add skip_empty_output intrinsic (requires substreams server version v1.9.0 or later)
Expand Down Expand Up @@ -34,10 +47,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## 0.5.14

- Add index keys protobuf in substreams crate
- Add `matches_keys_in_parsed_expr` function returning a `bool`. It returns `true`, if the set of `keys` provided, matches the `expression`.
- Add index keys protobuf in substreams crate
- Add `matches_keys_in_parsed_expr` function returning a `bool`. It returns `true`, if the set of `keys` provided, matches the `expression`.
(Ex: `expression: (key1 || key2)`, if the set of keys contains key1 or contains key2, `matches_keys_in_parsed_expr(keys, expression)` returns `true`, else returns `false`)

## 0.5.13

Added support for specifying `let mut <param>: <Type>` when using the `#[substreams::handlers::map]` macros, this enables in-place trimming of received data.
Expand Down
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[toolchain]
channel = "1.71"
components = [ "rustfmt" ]
targets = [ "wasm32-unknown-unknown" ]
channel = "1.80"
components = ["rustfmt"]
targets = ["wasm32-unknown-unknown"]
162 changes: 116 additions & 46 deletions substreams/src/expr_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,35 @@ use anyhow::{Context, Error};
use pest::{iterators::Pair, Parser};
use pest_derive::Parser;

/// An expression matcher that can be used to match keys from a given expression.
/// You create a new [ExprMatcher] by calling [expr_matcher] with the input expression
/// or using [ExprMatcher::new] directly.
///
/// You can then re-use the matcher to match multiple keys against the same expression.
/// re-using the expression "parsed" state.
pub struct ExprMatcher<'a> {
pair: Pair<'a, Rule>,
}

impl<'a> ExprMatcher<'a> {
pub fn new(input: &'a str) -> Result<Self, Error> {
Ok(ExprMatcher {
pair: parsing(input)?,
})
}

/// Matches the given keys against the expression. Returns true if the keys match the expression.
pub fn matches_keys<K: AsRef<str>>(&self, keys: &[K]) -> bool {
apply_rule(self.pair.clone(), keys)
}
}

/// Create a new expression matcher from the given input. The matcher can be re-used
/// across the whole block matching multiple elements.
pub fn expr_matcher(input: &'_ str) -> ExprMatcher<'_> {
ExprMatcher::new(input).expect("creating expression matcher failed")
}

#[derive(Parser)]
#[grammar = "expr_parser_rule.pest"]
struct EParser;
Expand All @@ -12,16 +41,19 @@ fn parsing(input: &str) -> Result<Pair<Rule>, Error> {

match pairs.into_iter().next() {
Some(pair) => Ok(pair),
None => Err(anyhow::Error::msg("no pairs found in input"))
None => Err(anyhow::Error::msg("no pairs found in input")),
}
}

pub fn matches_keys_in_parsed_expr<K: AsRef<str>, I: AsRef<str>>(keys: &[K], input: I) -> Result<bool, Error> {
let successful_parse = parsing(input.as_ref()).context("parsing expression")?;
pub fn matches_keys_in_parsed_expr<K: AsRef<str>, I: AsRef<str>>(
keys: &[K],
input: I,
) -> Result<bool, Error> {
let successful_parse = parsing(input.as_ref()).context("parsing expression")?;
Ok(apply_rule(successful_parse, keys))
}

fn apply_rule<K: AsRef<str>>(pair: Pair<Rule>, keys: &[K]) -> bool {
fn apply_rule<K: AsRef<str>>(pair: Pair<Rule>, keys: &[K]) -> bool {
match pair.as_rule() {
Rule::expression => {
let inner_pair = pair.into_inner().next().unwrap();
Expand All @@ -33,14 +65,14 @@ fn apply_rule<K: AsRef<str>>(pair: Pair<Rule>, keys: &[K]) -> bool {
result = result || apply_rule(inner_pair, keys);
}
return result;
},
}
Rule::and => {
let mut result = true;
for inner_pair in pair.into_inner() {
result = result && apply_rule(inner_pair, keys);
}
return result;
},
}
Rule::value => {
let inner_pair = pair.into_inner().next().unwrap();
return apply_rule(inner_pair, keys);
Expand All @@ -49,13 +81,19 @@ fn apply_rule<K: AsRef<str>>(pair: Pair<Rule>, keys: &[K]) -> bool {
return keys.iter().any(|key| key.as_ref() == pair.as_str());
}
Rule::singleQuoteKeyTerm => {
return keys.iter().any(|key| key.as_ref() == pair.as_str().trim_matches('\''));
return keys
.iter()
.any(|key| key.as_ref() == pair.as_str().trim_matches('\''));
}
Rule::doubleQuoteKeyTerm => {
return keys.iter().any(|key| key.as_ref() == pair.as_str().trim_matches('"'));
return keys
.iter()
.any(|key| key.as_ref() == pair.as_str().trim_matches('"'));
}
_ => {
panic!("Unexpected rule encountered")
}
_ => {panic!("Unexpected rule encountered")}
}
}
}

#[cfg(test)]
Expand All @@ -70,32 +108,32 @@ fn expression_to_string(parsing: Pair<Rule>) -> String {
let mut result = String::new();
result.push_str("[");
for inner_pair in parsing.into_inner() {
result.push_str(&expression_to_string(inner_pair));
result.push_str(&expression_to_string(inner_pair));
result.push_str("||");
}
}

if result.ends_with("||") {
result.truncate(result.len() - 2);
result.truncate(result.len() - 2);
}

result.push_str("]");
return result;
},
}
Rule::and => {
let mut result = String::new();
result.push_str("<");
for inner_pair in parsing.into_inner() {
result.push_str(&expression_to_string(inner_pair));
result.push_str(&expression_to_string(inner_pair));
result.push_str("&&");
}
}

if result.ends_with("&&") {
result.truncate(result.len() - 2);
result.truncate(result.len() - 2);
}

result.push_str(">");
return result;
},
}
Rule::value => {
let inner_pair = parsing.into_inner().next().unwrap();
return expression_to_string(inner_pair);
Expand All @@ -109,16 +147,30 @@ fn expression_to_string(parsing: Pair<Rule>) -> String {
Rule::doubleQuoteKeyTerm => {
return parsing.as_str().trim_matches('\"').to_string();
}
_ => {panic!("Unexpected rule encountered")}
_ => {
panic!("Unexpected rule encountered")
}
}
}


#[cfg(test)]
mod tests {
use rstest::rstest;
use super::*;
static TEST_KEYS: &[&str] = &["test", "test1", "test2", "test3", "test4", "test5", "test 6", "test.7", "test:8", "test_9", "test*19z_|", "type:wasm-MarketUpdated"];
use rstest::rstest;
static TEST_KEYS: &[&str] = &[
"test",
"test1",
"test2",
"test3",
"test4",
"test5",
"test 6",
"test.7",
"test:8",
"test_9",
"test*19z_|",
"type:wasm-MarketUpdated",
];

#[rstest]
#[case(TEST_KEYS, "test", true)]
Expand All @@ -127,7 +179,6 @@ mod tests {
#[case(TEST_KEYS, "'test_6' && test3", false)]
#[case(TEST_KEYS, "\"test 6\"|| test7", true)]
#[case(TEST_KEYS, "\"test 6\" && test3", true)]

#[case(TEST_KEYS, "test.7", true)]
#[case(TEST_KEYS, "type:wasm-MarketUpdated", true)]
#[case(TEST_KEYS, "type:was-mMarketUpdated", false)]
Expand All @@ -141,58 +192,73 @@ mod tests {
#[case(TEST_KEYS, "test10 && test:8", false)]
#[case(TEST_KEYS, "(test10 && test_9) || (test.7 && test:8)", true)]
#[case(TEST_KEYS, "(test10 && test_9) || (test.7 && test*19z_|)", true)]
#[case(TEST_KEYS, "(test10 && test_9) || test*19z || (test.7 && test*19z_|)", true)]
#[case(TEST_KEYS, "(test10 && test_9) || test*19z && (test.7 && test*19z_|)", false)]

#[case(
TEST_KEYS,
"(test10 && test_9) || test*19z || (test.7 && test*19z_|)",
true
)]
#[case(
TEST_KEYS,
"(test10 && test_9) || test*19z && (test.7 && test*19z_|)",
false
)]
#[case(TEST_KEYS, "test1 || test", true)]
#[case(TEST_KEYS, "test1 || test6", true)]
#[case(TEST_KEYS, "test6 || test7", false)]

#[case(TEST_KEYS, "test1 || test || test2", true)]
#[case(TEST_KEYS, "test1 || test6 || test7", true)]
#[case(TEST_KEYS, "test6 || test7 || test8", false)]

#[case(TEST_KEYS, "test1 && test", true)]
#[case(TEST_KEYS, "test1 && test6", false)]
#[case(TEST_KEYS, "test6 && test7", false)]

#[case(TEST_KEYS, "test1 && test && test2", true)]
#[case(TEST_KEYS, "test1&& test2 &&test7", false)]
#[case(TEST_KEYS, "test6 &&test7 && test8", false)]

#[case(TEST_KEYS, "test1 test", true)]
#[case(TEST_KEYS, "test1 test6", false)]
#[case(TEST_KEYS, "test6 test7", false)]

#[case(TEST_KEYS, "(test1)", true)]
#[case(TEST_KEYS, "(test1 test6)", false)]

#[case(TEST_KEYS, "test1 test2 ", true)]
#[case(TEST_KEYS, "test1 && test2 ", true)]
#[case(TEST_KEYS, "test1 && test6", false)]
#[case(TEST_KEYS, "(test1 || test3) && test6 ", false)]
#[case(TEST_KEYS, "(test1 || test6 || test7 ) && (test4 || test5) && test3 ", true)]

#[case(TEST_KEYS, "(test1 || test6 || test7) && (test4 || test5) && test3 ", true)]
#[case(TEST_KEYS, "(test1 && test6 && test7) || (test4 && test5) || test3 ", true)]
#[case(
TEST_KEYS,
"(test1 || test6 || test7 ) && (test4 || test5) && test3 ",
true
)]
#[case(
TEST_KEYS,
"(test1 || test6 || test7) && (test4 || test5) && test3 ",
true
)]
#[case(
TEST_KEYS,
"(test1 && test6 && test7) || (test4 && test5) || test3 ",
true
)]

fn test_matches_keys_in_parsed_expr(#[case] keys: &[&str], #[case] input: &str, #[case] expected: bool) {
fn test_matches_keys_in_parsed_expr(
#[case] keys: &[&str],
#[case] input: &str,
#[case] expected: bool,
) {
let pair = parsing(input).unwrap();
let expr_as_string = expression_to_string(pair);

let result = matches_keys_in_parsed_expr(keys, input).expect("matching keys in parsed expression");

let result =
matches_keys_in_parsed_expr(keys, input).expect("matching keys in parsed expression");

assert_eq!(result, expected, "This expression ast is {expr_as_string}");
}
}

#[rstest]

// In the current version of the parser, - should not be supported at the beginning of the expression.
#[case("-test", true)]
#[case("'-test'", true)]
#[case("'test-8'", false)]
#[case("test-8", false)]

#[case("'te't'", true)]
#[case("\"te\"st\"", true)]

Expand All @@ -204,6 +270,10 @@ mod tests {
} else {
assert!(pair.is_ok());
}
}
}
}

#[test]
fn it_expr_matcher_matches_keys() {
assert_eq!(expr_matcher("test").matches_keys(TEST_KEYS), true);
}
}
2 changes: 1 addition & 1 deletion substreams/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ pub mod key;
pub mod store;

pub mod expr_parser;
pub use expr_parser::matches_keys_in_parsed_expr;
pub use expr_parser::{expr_matcher, matches_keys_in_parsed_expr, ExprMatcher};

mod operation;

Expand Down

0 comments on commit b3d100c

Please sign in to comment.