From a07c7b6085c0f26833ef7004fda96b402feb01fa Mon Sep 17 00:00:00 2001 From: Boris Zhguchev Date: Tue, 20 Feb 2024 00:55:10 +0100 Subject: [PATCH] 61 regex perf (#62) * add rgex bench * fix inter * init impl * add config * fix complains --------- Co-authored-by: Boris Zhguchev --- CHANGELOG.md | 4 ++ Cargo.toml | 10 +++- README.md | 43 +++++++++++++++ benches/regex_bench.rs | 40 ++++++++++++++ src/lib.rs | 60 ++++++++++++++++---- src/path/config.rs | 16 ++++++ src/path/config/cache.rs | 115 +++++++++++++++++++++++++++++++++++++++ src/path/index.rs | 83 ++++++++++++++++------------ src/path/json.rs | 43 +++++++++++---- src/path/mod.rs | 34 ++++++++---- src/path/top.rs | 43 +++++++++------ 11 files changed, 405 insertions(+), 86 deletions(-) create mode 100644 benches/regex_bench.rs create mode 100644 src/path/config.rs create mode 100644 src/path/config/cache.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index fcc1bdd..d2eec56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,4 +38,8 @@ * **`0.3.5`** * add `!` negation operation in filters * allow using () in filters +* **`0.5`** + * add config for jsonpath + * add an option to add a regex cache for boosting performance + diff --git a/Cargo.toml b/Cargo.toml index 832e1ed..22854dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "jsonpath-rust" description = "The library provides the basic functionality to find the set of the data according to the filtering query." -version = "0.4.0" +version = "0.5.0" authors = ["BorisZhguchev "] edition = "2018" license-file = "LICENSE" @@ -17,6 +17,12 @@ regex = "1" pest = "2.0" pest_derive = "2.0" thiserror = "1.0.50" +lazy_static = "1.4" +once_cell = "1.19.0" [dev-dependencies] -lazy_static = "1.0" +criterion = "0.5.1" + +[[bench]] +name = "regex_bench" +harness = false \ No newline at end of file diff --git a/README.md b/README.md index 8384f80..cc09de2 100644 --- a/README.md +++ b/README.md @@ -389,7 +389,50 @@ fn test() { ** If the value has been modified during the search, there is no way to find a path of a new value. It can happen if we try to find a length() of array, for in stance.** +## Configuration +The JsonPath provides a wat to configure the search by using `JsonPathConfig`. + +```rust +pub fn main() { + let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default())); +} +``` + +### Regex cache +The configuration provides an ability to use a regex cache to improve the [performance](https://github.com/besok/jsonpath-rust/issues/61) + +To instantiate the cache needs to use `RegexCache` enum with the implementation of the trait `RegexCacheInst`. +Default implementation `DefaultRegexCacheInst` uses `Arc>>`. +The pair of Box or Value and config can be used: +```rust +pub fn main(){ + let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default())); + let json = Box::new(json!({ + "author":"abcd(Rees)", + })); + + let _v = (json, cfg).path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]") + .expect("the path is correct"); + + +} +``` +or using `JsonPathFinder` : + +```rust +fn main() { + let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default())); + let finder = JsonPathFinder::from_str_with_cfg( + r#"{"first":{"second":[{"active":1},{"passive":1}]}}"#, + "$.first.second[?(@.active)]", + cfg, + ).unwrap(); + let slice_of_data: Vec<&Value> = finder.find_slice(); + let js = json!({"active":1}); + assert_eq!(slice_of_data, vec![JsonPathValue::Slice(&js, "$.first.second[0]".to_string())]); +} +``` ## The structure diff --git a/benches/regex_bench.rs b/benches/regex_bench.rs new file mode 100644 index 0000000..2b88e7f --- /dev/null +++ b/benches/regex_bench.rs @@ -0,0 +1,40 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use jsonpath_rust::path::config::cache::{DefaultRegexCacheInst, RegexCache}; +use jsonpath_rust::path::config::JsonPathConfig; +use jsonpath_rust::{JsonPathFinder, JsonPathInst, JsonPathQuery}; +use once_cell::sync::Lazy; +use serde_json::{json, Value}; +use std::str::FromStr; + +fn regex_perf_test_with_cache(cfg: JsonPathConfig) { + let json = Box::new(json!({ + "author":"abcd(Rees)", + })); + + let _v = (json, cfg) + .path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]") + .expect("the path is correct"); +} + +fn regex_perf_test_without_cache() { + let json = Box::new(json!({ + "author":"abcd(Rees)", + })); + + let _v = json + .path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]") + .expect("the path is correct"); +} + +pub fn criterion_benchmark(c: &mut Criterion) { + let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default())); + c.bench_function("regex bench without cache", |b| { + b.iter(|| regex_perf_test_without_cache()) + }); + c.bench_function("regex bench with cache", |b| { + b.iter(|| regex_perf_test_with_cache(cfg.clone())) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index aba2969..215f613 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -116,6 +116,7 @@ use crate::parser::model::JsonPath; use crate::parser::parser::parse_json_path; +use crate::path::config::JsonPathConfig; use crate::path::{json_path_instance, PathInstance}; use serde_json::Value; use std::convert::TryInto; @@ -182,8 +183,12 @@ impl FromStr for JsonPathInst { } impl JsonPathInst { - pub fn find_slice<'a>(&'a self, value: &'a Value) -> Vec> { - json_path_instance(&self.inner, value) + pub fn find_slice<'a>( + &'a self, + value: &'a Value, + cfg: JsonPathConfig, + ) -> Vec> { + json_path_instance(&self.inner, value, cfg) .find(JsonPathValue::from_root(value)) .into_iter() .filter(|v| v.has_value()) @@ -224,6 +229,13 @@ impl JsonPathQuery for Box { } } +impl JsonPathQuery for (Box, JsonPathConfig) { + fn path(self, query: &str) -> Result { + let p = JsonPathInst::from_str(query)?; + Ok(JsonPathFinder::new_with_cfg(self.0, Box::new(p), self.1).find()) + } +} + impl JsonPathQuery for Value { fn path(self, query: &str) -> Result { let p = JsonPathInst::from_str(query)?; @@ -231,6 +243,13 @@ impl JsonPathQuery for Value { } } +impl JsonPathQuery for (Value, JsonPathConfig) { + fn path(self, query: &str) -> Result { + let p = JsonPathInst::from_str(query)?; + Ok(JsonPathFinder::new_with_cfg(Box::new(self.0), Box::new(p), self.1).find()) + } +} + /// just to create a json path value of data /// Example: /// - json_path_value(&json) = `JsonPathValue::Slice(&json)` @@ -294,6 +313,7 @@ type JsPathStr = String; pub(crate) fn jsp_idx(prefix: &str, idx: usize) -> String { format!("{}[{}]", prefix, idx) } + pub(crate) fn jsp_obj(prefix: &str, key: &str) -> String { format!("{}.['{}']", prefix, key) } @@ -337,7 +357,7 @@ impl<'a, Data: Clone + Debug + Default> JsonPathValue<'a, Data> { } impl<'a, Data> JsonPathValue<'a, Data> { - fn only_no_value(input: &Vec>) -> bool { + fn only_no_value(input: &[JsonPathValue<'a, Data>]) -> bool { !input.is_empty() && input.iter().filter(|v| v.has_value()).count() == 0 } fn map_vec(data: Vec<(&'a Data, JsPathStr)>) -> Vec> { @@ -407,12 +427,26 @@ impl<'a, Data> JsonPathValue<'a, Data> { pub struct JsonPathFinder { json: Box, path: Box, + cfg: JsonPathConfig, } impl JsonPathFinder { /// creates a new instance of [JsonPathFinder] pub fn new(json: Box, path: Box) -> Self { - JsonPathFinder { json, path } + JsonPathFinder { + json, + path, + cfg: JsonPathConfig::default(), + } + } + + pub fn new_with_cfg(json: Box, path: Box, cfg: JsonPathConfig) -> Self { + JsonPathFinder { json, path, cfg } + } + + /// sets a cfg with a new one + pub fn set_cfg(&mut self, cfg: JsonPathConfig) { + self.cfg = cfg } /// updates a path with a new one @@ -440,10 +474,15 @@ impl JsonPathFinder { let path = Box::new(JsonPathInst::from_str(path)?); Ok(JsonPathFinder::new(json, path)) } + pub fn from_str_with_cfg(json: &str, path: &str, cfg: JsonPathConfig) -> Result { + let json = serde_json::from_str(json).map_err(|e| e.to_string())?; + let path = Box::new(JsonPathInst::from_str(path)?); + Ok(JsonPathFinder::new_with_cfg(json, path, cfg)) + } /// creates an instance to find a json slice from the json pub fn instance(&self) -> PathInstance { - json_path_instance(&self.path.inner, &self.json) + json_path_instance(&self.path.inner, &self.json, self.cfg.clone()) } /// finds a slice of data in the set json. /// The result is a vector of references to the incoming structure. @@ -494,6 +533,7 @@ impl JsonPathFinder { #[cfg(test)] mod tests { + use crate::path::config::JsonPathConfig; use crate::JsonPathQuery; use crate::JsonPathValue::{NoValue, Slice}; use crate::{jp_v, JsonPathFinder, JsonPathInst, JsonPathValue}; @@ -1194,7 +1234,7 @@ mod tests { let query = JsonPathInst::from_str("$..book[?(@.author size 10)].title") .expect("the path is correct"); - let results = query.find_slice(&json); + let results = query.find_slice(&json, JsonPathConfig::default()); let v = results.first().expect("to get value"); // V can be implicitly converted to &Value @@ -1257,7 +1297,7 @@ mod tests { v, vec![Slice( &json!({"second":{"active": 1}}), - "$.['first']".to_string() + "$.['first']".to_string(), )] ); @@ -1271,7 +1311,7 @@ mod tests { v, vec![Slice( &json!({"second":{"active": 1}}), - "$.['first']".to_string() + "$.['first']".to_string(), )] ); @@ -1285,7 +1325,7 @@ mod tests { v, vec![Slice( &json!({"second":{"active": 1}}), - "$.['first']".to_string() + "$.['first']".to_string(), )] ); @@ -1299,7 +1339,7 @@ mod tests { v, vec![Slice( &json!({"second":{"active": 1}}), - "$.['first']".to_string() + "$.['first']".to_string(), )] ); } diff --git a/src/path/config.rs b/src/path/config.rs new file mode 100644 index 0000000..b534712 --- /dev/null +++ b/src/path/config.rs @@ -0,0 +1,16 @@ +pub mod cache; + +use crate::path::config::cache::RegexCache; + +/// Configuration to adjust the jsonpath search +#[derive(Clone, Default)] +pub struct JsonPathConfig { + /// cache to provide + pub regex_cache: RegexCache, +} + +impl JsonPathConfig { + pub fn new(regex_cache: RegexCache) -> Self { + Self { regex_cache } + } +} diff --git a/src/path/config/cache.rs b/src/path/config/cache.rs new file mode 100644 index 0000000..ebe7e23 --- /dev/null +++ b/src/path/config/cache.rs @@ -0,0 +1,115 @@ +use regex::{Error, Regex}; +use serde_json::Value; +use std::collections::HashMap; +use std::sync::{Arc, Mutex, PoisonError}; + +/// The option to provide a cache for regex +/// ``` +/// use serde_json::json; +/// use jsonpath_rust::JsonPathQuery; +/// use jsonpath_rust::path::config::cache::{DefaultRegexCacheInst, RegexCache}; +/// use jsonpath_rust::path::config::JsonPathConfig; +/// +/// let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default())); +/// let json = Box::new(json!({ +/// "author":"abcd(Rees)", +/// })); +/// +/// let _v = (json, cfg).path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]") +/// .expect("the path is correct"); +#[derive(Clone)] +pub enum RegexCache +where + T: Clone + RegexCacheInst, +{ + Absent, + Implemented(T), +} + +impl RegexCache +where + T: Clone + RegexCacheInst, +{ + pub fn is_implemented(&self) -> bool { + match self { + RegexCache::Absent => false, + RegexCache::Implemented(_) => true, + } + } + pub fn get_instance(&self) -> Result<&T, RegexCacheError> { + match self { + RegexCache::Absent => Err(RegexCacheError::new("the instance is absent".to_owned())), + RegexCache::Implemented(inst) => Ok(inst), + } + } + + pub fn instance(instance: T) -> Self { + RegexCache::Implemented(instance) + } +} +#[allow(clippy::derivable_impls)] +impl Default for RegexCache { + fn default() -> Self { + RegexCache::Absent + } +} + +/// A trait that defines the behavior for regex cache +pub trait RegexCacheInst { + fn validate(&self, regex: &str, values: Vec<&Value>) -> Result; +} + +/// Default implementation for regex cache. It uses Arc and Mutex to be capable of working +/// among the threads. +#[derive(Default, Debug, Clone)] +pub struct DefaultRegexCacheInst { + cache: Arc>>, +} + +impl RegexCacheInst for DefaultRegexCacheInst { + fn validate(&self, regex: &str, values: Vec<&Value>) -> Result { + let mut cache = self.cache.lock()?; + if cache.contains_key(regex) { + let r = cache.get(regex).unwrap(); + Ok(validate(r, values)) + } else { + let new_reg = Regex::new(regex)?; + let result = validate(&new_reg, values); + cache.insert(regex.to_owned(), new_reg); + Ok(result) + } + } +} + +fn validate(r: &Regex, values: Vec<&Value>) -> bool { + for el in values.iter() { + if let Some(v) = el.as_str() { + if r.is_match(v) { + return true; + } + } + } + false +} + +pub struct RegexCacheError { + pub reason: String, +} + +impl From for RegexCacheError { + fn from(value: Error) -> Self { + RegexCacheError::new(value.to_string()) + } +} + +impl From> for RegexCacheError { + fn from(value: PoisonError) -> Self { + RegexCacheError::new(value.to_string()) + } +} + +impl RegexCacheError { + pub fn new(reason: String) -> Self { + Self { reason } + } +} diff --git a/src/path/index.rs b/src/path/index.rs index cabf9d7..cc018f0 100644 --- a/src/path/index.rs +++ b/src/path/index.rs @@ -1,9 +1,9 @@ -use crate::jsp_idx; use crate::parser::model::{FilterExpression, FilterSign, JsonPath}; use crate::path::json::*; use crate::path::top::ObjectField; use crate::path::{json_path_instance, process_operand, JsonPathValue, Path, PathInstance}; use crate::JsonPathValue::{NoValue, Slice}; +use crate::{jsp_idx, JsonPathConfig}; use serde_json::value::Value::Array; use serde_json::Value; @@ -124,10 +124,10 @@ pub(crate) struct Current<'a> { } impl<'a> Current<'a> { - pub(crate) fn from(jp: &'a JsonPath, root: &'a Value) -> Self { + pub(crate) fn from(jp: &'a JsonPath, root: &'a Value, cfg: JsonPathConfig) -> Self { match jp { JsonPath::Empty => Current::none(), - tail => Current::new(json_path_instance(tail, root)), + tail => Current::new(json_path_instance(tail, root, cfg)), } } pub(crate) fn new(tail: PathInstance<'a>) -> Self { @@ -196,6 +196,7 @@ pub enum FilterPath<'a> { left: PathInstance<'a>, right: PathInstance<'a>, op: &'a FilterSign, + cfg: JsonPathConfig, }, Or { left: PathInstance<'a>, @@ -211,23 +212,24 @@ pub enum FilterPath<'a> { } impl<'a> FilterPath<'a> { - pub(crate) fn new(expr: &'a FilterExpression, root: &'a Value) -> Self { + pub(crate) fn new(expr: &'a FilterExpression, root: &'a Value, cfg: JsonPathConfig) -> Self { match expr { FilterExpression::Atom(left, op, right) => FilterPath::Filter { - left: process_operand(left, root), - right: process_operand(right, root), + left: process_operand(left, root, cfg.clone()), + right: process_operand(right, root, cfg.clone()), op, + cfg, }, FilterExpression::And(l, r) => FilterPath::And { - left: Box::new(FilterPath::new(l, root)), - right: Box::new(FilterPath::new(r, root)), + left: Box::new(FilterPath::new(l, root, cfg.clone())), + right: Box::new(FilterPath::new(r, root, cfg.clone())), }, FilterExpression::Or(l, r) => FilterPath::Or { - left: Box::new(FilterPath::new(l, root)), - right: Box::new(FilterPath::new(r, root)), + left: Box::new(FilterPath::new(l, root, cfg.clone())), + right: Box::new(FilterPath::new(r, root, cfg.clone())), }, FilterExpression::Not(exp) => FilterPath::Not { - exp: Box::new(FilterPath::new(exp, root)), + exp: Box::new(FilterPath::new(exp, root, cfg)), }, } } @@ -236,45 +238,48 @@ impl<'a> FilterPath<'a> { two: &'a FilterSign, left: Vec>, right: Vec>, + cfg: JsonPathConfig, ) -> bool { - FilterPath::process_atom(one, left.clone(), right.clone()) - || FilterPath::process_atom(two, left, right) + FilterPath::process_atom(one, left.clone(), right.clone(), cfg.clone()) + || FilterPath::process_atom(two, left, right, cfg) } fn process_atom( op: &'a FilterSign, left: Vec>, right: Vec>, + cfg: JsonPathConfig, ) -> bool { match op { FilterSign::Equal => eq( JsonPathValue::vec_as_data(left), JsonPathValue::vec_as_data(right), ), - FilterSign::Unequal => !FilterPath::process_atom(&FilterSign::Equal, left, right), + FilterSign::Unequal => !FilterPath::process_atom(&FilterSign::Equal, left, right, cfg), FilterSign::Less => less( JsonPathValue::vec_as_data(left), JsonPathValue::vec_as_data(right), ), FilterSign::LeOrEq => { - FilterPath::compound(&FilterSign::Less, &FilterSign::Equal, left, right) + FilterPath::compound(&FilterSign::Less, &FilterSign::Equal, left, right, cfg) } FilterSign::Greater => less( JsonPathValue::vec_as_data(right), JsonPathValue::vec_as_data(left), ), FilterSign::GrOrEq => { - FilterPath::compound(&FilterSign::Greater, &FilterSign::Equal, left, right) + FilterPath::compound(&FilterSign::Greater, &FilterSign::Equal, left, right, cfg) } FilterSign::Regex => regex( JsonPathValue::vec_as_data(left), JsonPathValue::vec_as_data(right), + &cfg.regex_cache, ), FilterSign::In => inside( JsonPathValue::vec_as_data(left), JsonPathValue::vec_as_data(right), ), - FilterSign::Nin => !FilterPath::process_atom(&FilterSign::In, left, right), - FilterSign::NoneOf => !FilterPath::process_atom(&FilterSign::AnyOf, left, right), + FilterSign::Nin => !FilterPath::process_atom(&FilterSign::In, left, right, cfg), + FilterSign::NoneOf => !FilterPath::process_atom(&FilterSign::AnyOf, left, right, cfg), FilterSign::AnyOf => any_of( JsonPathValue::vec_as_data(left), JsonPathValue::vec_as_data(right), @@ -294,10 +299,16 @@ impl<'a> FilterPath<'a> { fn process(&self, curr_el: &'a Value) -> bool { let pref = String::new(); match self { - FilterPath::Filter { left, right, op } => FilterPath::process_atom( + FilterPath::Filter { + left, + right, + op, + cfg, + } => FilterPath::process_atom( op, left.find(Slice(curr_el, pref.clone())), right.find(Slice(curr_el, pref)), + cfg.clone(), ), FilterPath::Or { left, right } => { if !JsonPathValue::vec_as_data(left.find(Slice(curr_el, pref.clone()))).is_empty() { @@ -474,7 +485,7 @@ mod tests { let chain = chain!(path!($), path!("object"), path!(@)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let res = json!({ "field_1":[1,2,3], "field_2":42, @@ -487,7 +498,7 @@ mod tests { let cur = path!(@,path!("field_3"),path!("a")); let chain = chain!(path!($), path!("object"), cur); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let res1 = json!("b"); let expected_res = vec![JsonPathValue::new_slice( @@ -507,7 +518,7 @@ mod tests { let index = path!(idx!(?filter!(op!(path!(@, path!("field"))), "exists", op!()))); let chain = chain!(path!($), path!("key"), index, path!("field")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let exp1 = json!([1, 2, 3, 4, 5]); let exp2 = json!(42); @@ -538,7 +549,7 @@ mod tests { let chain = chain!(path!($), path!("key"), index); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let exp1 = json!( {"field":10}); let exp2 = json!( {"field":5}); @@ -557,7 +568,7 @@ mod tests { idx!(?filter!(op!(path!(@, path!("field"))), ">=", op!(chain!(path!($), path!("threshold"))))) ); let chain = chain!(path!($), path!("key"), index); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let expected_res = jp_v![ &exp1;"$.['key'][1]", &exp3;"$.['key'][2]", &exp2;"$.['key'][3]"]; assert_eq!( @@ -569,7 +580,7 @@ mod tests { idx!(?filter!(op!(path!(@, path!("field"))), "<", op!(chain!(path!($), path!("threshold"))))) ); let chain = chain!(path!($), path!("key"), index); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let expected_res = jp_v![&exp4;"$.['key'][0]", &exp4;"$.['key'][4]"]; assert_eq!( path_inst.find(JsonPathValue::from_root(&json)), @@ -580,7 +591,7 @@ mod tests { idx!(?filter!(op!(path!(@, path!("field"))), "<=", op!(chain!(path!($), path!("threshold"))))) ); let chain = chain!(path!($), path!("key"), index); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let expected_res = jp_v![ &exp4;"$.['key'][0]", &exp3;"$.['key'][2]", @@ -605,7 +616,7 @@ mod tests { let index = idx!(?filter!(op!(path!(@,path!("field"))),"~=", op!("[a-zA-Z]+[0-9]#[0-9]+"))); let chain = chain!(path!($), path!("key"), path!(index)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let exp2 = json!( {"field":"a1#1"}); let expected_res = jp_v![&exp2;"$.['key'][1]",]; @@ -634,7 +645,7 @@ mod tests { let chain = chain!(path!($), JsonPath::Field(String::from("key")), path!(index)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let exp2 = json!( {"field":"a11#"}); let expected_res = jp_v![&exp2;"$.['key'][0]",]; @@ -658,7 +669,7 @@ mod tests { let index = idx!(?filter!(op!(path!(@, path!("field"))),"size",op!(4))); let chain = chain!(path!($), path!("key"), path!(index)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let f1 = json!( {"field":"aaaa"}); let f2 = json!( {"field":"dddd"}); @@ -684,7 +695,7 @@ mod tests { op!(path!(@,path!("not_id"))), "==",op!(2) )); let chain = chain!(path!($), path!("obj"), path!(index)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let js = json!({ "id":1, "not_id": 2, @@ -716,7 +727,7 @@ mod tests { ) ); let chain = chain!(path!($), path!("key"), path!(index), path!("city")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let a = json!("Athlon"); let d = json!("Dortmund"); let dd = json!("Dublin"); @@ -745,7 +756,7 @@ mod tests { ) ); let chain = chain!(path!($), path!("key"), path!(index), path!("id")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let j1 = json!(1); assert_eq!( path_inst.find(JsonPathValue::from_root(&json)), @@ -769,7 +780,7 @@ mod tests { ) ); let chain = chain!(path!($), path!("key"), path!(index), path!("id")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let j1 = json!(1); assert_eq!( path_inst.find(JsonPathValue::from_root(&json)), @@ -797,7 +808,7 @@ mod tests { ) ); let chain = chain!(path!($), path!("key"), path!(index), path!("city")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let a = json!("Athlon"); let value = jp_v!( &a;"$.['key'][4].['city']",); assert_eq!(path_inst.find(JsonPathValue::from_root(&json)), value) @@ -819,7 +830,7 @@ mod tests { ) ); let chain = chain!(path!($), path!("key"), path!(index), path!("id")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let j1 = json!(1); assert_eq!( path_inst.find(JsonPathValue::from_root(&json)), @@ -843,7 +854,7 @@ mod tests { ) ); let chain = chain!(path!($), path!("key"), path!(index), path!("id")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); assert_eq!( path_inst.find(JsonPathValue::from_root(&json)), vec![NoValue] diff --git a/src/path/json.rs b/src/path/json.rs index 83d9a7e..c29da5d 100644 --- a/src/path/json.rs +++ b/src/path/json.rs @@ -1,3 +1,4 @@ +use crate::path::config::cache::{RegexCache, RegexCacheInst}; use regex::Regex; use serde_json::Value; @@ -92,15 +93,24 @@ pub fn any_of(left: Vec<&Value>, right: Vec<&Value>) -> bool { false } -/// ensure that the element on the left sides mathes the regex on the right side -pub fn regex(left: Vec<&Value>, right: Vec<&Value>) -> bool { +/// ensure that the element on the left sides matches the regex on the right side +pub fn regex( + left: Vec<&Value>, + right: Vec<&Value>, + cache: &RegexCache, +) -> bool { if left.is_empty() || right.is_empty() { return false; } match right.first() { Some(Value::String(str)) => { - if let Ok(regex) = Regex::new(str) { + if cache.is_implemented() { + cache + .get_instance() + .and_then(|inst| inst.validate(str, left)) + .unwrap_or(false) + } else if let Ok(regex) = Regex::new(str) { for el in left.iter() { if let Some(v) = el.as_str() { if regex.is_match(v) { @@ -108,8 +118,10 @@ pub fn regex(left: Vec<&Value>, right: Vec<&Value>) -> bool { } } } + false + } else { + false } - false } _ => false, } @@ -170,6 +182,7 @@ pub fn eq(left: Vec<&Value>, right: Vec<&Value>) -> bool { #[cfg(test)] mod tests { + use crate::path::config::cache::RegexCache; use crate::path::json::{any_of, eq, less, regex, size, sub_set_of}; use serde_json::{json, Value}; @@ -202,12 +215,12 @@ mod tests { assert!(eq( vec![&left, &left1, &left2, &left3], - vec![&right, &right1, &right2, &right3] + vec![&right, &right1, &right2, &right3], )); assert!(!eq( vec![&left1, &left, &left2, &left3], - vec![&right, &right1, &right2, &right3] + vec![&right, &right1, &right2, &right3], )); } @@ -243,8 +256,16 @@ mod tests { let left3 = json!("a#11"); let left4 = json!("#a11"); - assert!(regex(vec![&left1, &left2, &left3, &left4], vec![&right])); - assert!(!regex(vec![&left1, &left3, &left4], vec![&right])) + assert!(regex( + vec![&left1, &left2, &left3, &left4], + vec![&right], + &RegexCache::default() + )); + assert!(!regex( + vec![&left1, &left3, &left4], + vec![&right], + &RegexCache::default() + )) } #[test] @@ -272,13 +293,13 @@ mod tests { vec![&Value::Array(vec![ left1.clone(), left2.clone(), - left3.clone() + left3.clone(), ])], - vec![&right] + vec![&right], )); assert!(!sub_set_of( vec![&Value::Array(vec![left1, left2, left3, left40])], - vec![&right] + vec![&right], )); } diff --git a/src/path/mod.rs b/src/path/mod.rs index fbe5e92..aeda6b9 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -1,10 +1,12 @@ -use crate::JsonPathValue; +use crate::{JsonPathConfig, JsonPathValue}; use serde_json::Value; use crate::parser::model::{Function, JsonPath, JsonPathIndex, Operand}; use crate::path::index::{ArrayIndex, ArraySlice, Current, FilterPath, UnionIndex}; use crate::path::top::*; +/// The module provides the ability to adjust the behavior of the search +pub mod config; /// The module is in charge of processing [[JsonPathIndex]] elements mod index; /// The module is a helper module providing the set of helping funcitons to process a json elements @@ -30,6 +32,10 @@ pub trait Path<'a> { ) -> Vec> { input.into_iter().flat_map(|d| self.find(d)).collect() } + fn cfg(&self) -> JsonPathConfig { + JsonPathConfig::default() + } + /// defines when we need to invoke `find` or `flat_find` fn needs_all(&self) -> bool { false @@ -40,36 +46,44 @@ pub trait Path<'a> { pub type PathInstance<'a> = Box + 'a>; /// The major method to process the top part of json part -pub fn json_path_instance<'a>(json_path: &'a JsonPath, root: &'a Value) -> PathInstance<'a> { +pub fn json_path_instance<'a>( + json_path: &'a JsonPath, + root: &'a Value, + cfg: JsonPathConfig, +) -> PathInstance<'a> { match json_path { JsonPath::Root => Box::new(RootPointer::new(root)), JsonPath::Field(key) => Box::new(ObjectField::new(key)), - JsonPath::Chain(chain) => Box::new(Chain::from(chain, root)), + JsonPath::Chain(chain) => Box::new(Chain::from(chain, root, cfg)), JsonPath::Wildcard => Box::new(Wildcard {}), JsonPath::Descent(key) => Box::new(DescentObject::new(key)), JsonPath::DescentW => Box::new(DescentWildcard), - JsonPath::Current(value) => Box::new(Current::from(value, root)), - JsonPath::Index(index) => process_index(index, root), + JsonPath::Current(value) => Box::new(Current::from(value, root, cfg)), + JsonPath::Index(index) => process_index(index, root, cfg), JsonPath::Empty => Box::new(IdentityPath {}), JsonPath::Fn(Function::Length) => Box::new(FnPath::Size), } } /// The method processes the indexes(all expressions indie []) -fn process_index<'a>(json_path_index: &'a JsonPathIndex, root: &'a Value) -> PathInstance<'a> { +fn process_index<'a>( + json_path_index: &'a JsonPathIndex, + root: &'a Value, + cfg: JsonPathConfig, +) -> PathInstance<'a> { match json_path_index { JsonPathIndex::Single(index) => Box::new(ArrayIndex::new(index.as_u64().unwrap() as usize)), JsonPathIndex::Slice(s, e, step) => Box::new(ArraySlice::new(*s, *e, *step)), JsonPathIndex::UnionKeys(elems) => Box::new(UnionIndex::from_keys(elems)), JsonPathIndex::UnionIndex(elems) => Box::new(UnionIndex::from_indexes(elems)), - JsonPathIndex::Filter(fe) => Box::new(FilterPath::new(fe, root)), + JsonPathIndex::Filter(fe) => Box::new(FilterPath::new(fe, root, cfg)), } } /// The method processes the operand inside the filter expressions -fn process_operand<'a>(op: &'a Operand, root: &'a Value) -> PathInstance<'a> { +fn process_operand<'a>(op: &'a Operand, root: &'a Value, cfg: JsonPathConfig) -> PathInstance<'a> { match op { - Operand::Static(v) => json_path_instance(&JsonPath::Root, v), - Operand::Dynamic(jp) => json_path_instance(jp, root), + Operand::Static(v) => json_path_instance(&JsonPath::Root, v, cfg), + Operand::Dynamic(jp) => json_path_instance(jp, root, cfg), } } diff --git a/src/path/top.rs b/src/path/top.rs index 1d3fbf9..2c185e3 100644 --- a/src/path/top.rs +++ b/src/path/top.rs @@ -1,4 +1,5 @@ use crate::parser::model::*; +use crate::path::config::JsonPathConfig; use crate::path::{json_path_instance, JsonPathValue, Path, PathInstance}; use crate::JsonPathValue::{NewValue, NoValue, Slice}; use crate::{jsp_idx, jsp_obj, JsPathStr}; @@ -156,6 +157,7 @@ impl<'a> Path<'a> for ObjectField<'a> { vec![res] } } + /// the top method of the processing ..* pub(crate) struct DescentWildcard; @@ -259,7 +261,7 @@ impl<'a> Chain<'a> { is_search_length, } } - pub fn from(chain: &'a [JsonPath], root: &'a Value) -> Self { + pub fn from(chain: &'a [JsonPath], root: &'a Value, cfg: JsonPathConfig) -> Self { let chain_len = chain.len(); let is_search_length = if chain_len > 2 { let mut res = false; @@ -299,7 +301,10 @@ impl<'a> Chain<'a> { }; Chain::new( - chain.iter().map(|p| json_path_instance(p, root)).collect(), + chain + .iter() + .map(|p| json_path_instance(p, root, cfg.clone())) + .collect(), is_search_length, ) } @@ -368,17 +373,17 @@ mod tests { let field4 = path!("array"); let field5 = path!("object"); - let path_inst = json_path_instance(&path!($), &json); + let path_inst = json_path_instance(&path!($), &json, Default::default()); assert_eq!(path_inst.find(jp_v!(&json)), jp_v!(&json;"$",)); - let path_inst = json_path_instance(&field1, &json); + let path_inst = json_path_instance(&field1, &json, Default::default()); let exp_json = json!({"k":{"f":42,"array":[0,1,2,3,4,5],"object":{"field1":"val1","field2":"val2"}}}); assert_eq!(path_inst.find(jp_v!(&json)), jp_v!(&exp_json;".['v']",)); let chain = chain!(path!($), field1.clone(), field2.clone(), field3); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let exp_json = json!(42); assert_eq!( path_inst.find(jp_v!(&json)), @@ -392,7 +397,7 @@ mod tests { field4.clone(), path!(idx!(3)) ); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let exp_json = json!(3); assert_eq!( path_inst.find(jp_v!(&json)), @@ -407,7 +412,7 @@ mod tests { field4.clone(), path!(index) ); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let one = json!(1); let tree = json!(3); assert_eq!( @@ -423,7 +428,7 @@ mod tests { field4, path!(union) ); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let tree = json!(1); let two = json!(2); assert_eq!( @@ -433,7 +438,7 @@ mod tests { let union = idx!("field1", "field2"); let chain = chain!(path!($), field1.clone(), field2, field5, path!(union)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let one = json!("val1"); let two = json!("val2"); assert_eq!( @@ -443,16 +448,18 @@ mod tests { &two;"$.['v'].['k'].['object'].['field2']") ); } + #[test] fn path_descent_arr_test() { let json = json!([{"a":1}]); let chain = chain!(path!($), path!(.."a")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let one = json!(1); let expected_res = jp_v!(&one;"$[0].['a']",); assert_eq!(path_inst.find(jp_v!(&json)), expected_res) } + #[test] fn deep_path_test() { let value = json!([1]); @@ -467,7 +474,7 @@ mod tests { "key1": [1] }); let chain = chain!(path!($), path!(..*)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let arr = json!([1]); let one = json!(1); @@ -475,6 +482,7 @@ mod tests { let expected_res = jp_v!(&arr;"$.['key1']",&one;"$.['key1'][0]"); assert_eq!(path_inst.find(jp_v!(&json)), expected_res) } + #[test] fn path_descent_w_nested_array_test() { let json = json!( @@ -482,7 +490,7 @@ mod tests { "key2" : [{"a":1},{}] }); let chain = chain!(path!($), path!(..*)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let arr2 = json!([{"a": 1},{}]); let obj = json!({"a": 1}); @@ -515,7 +523,7 @@ mod tests { } }); let chain = chain!(path!($), path!(..*)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let key1 = json!([1]); let one = json!(1); @@ -552,6 +560,7 @@ mod tests { ]; assert_eq!(path_inst.find(jp_v!(&json)), expected_res) } + #[test] fn path_descent_test() { let json = json!( @@ -568,7 +577,7 @@ mod tests { } }); let chain = chain!(path!($), path!(.."key1")); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let res1 = json!([1, 2, 3]); let res2 = json!("key1"); @@ -593,7 +602,7 @@ mod tests { }); let chain = chain!(path!($), path!(*)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); let res1 = json!([1, 2, 3]); let res2 = json!("key"); @@ -612,7 +621,7 @@ mod tests { }); let chain = chain!(path!($), path!(*), function!(length)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); assert_eq!( path_inst.flat_find(vec![jp_v!(&json)], true), @@ -620,7 +629,7 @@ mod tests { ); let chain = chain!(path!($), path!("key1"), function!(length)); - let path_inst = json_path_instance(&chain, &json); + let path_inst = json_path_instance(&chain, &json, Default::default()); assert_eq!( path_inst.flat_find(vec![jp_v!(&json)], false), vec![jp_v!(json!(3))]