Skip to content

Commit

Permalink
tackler-rs: full haystack regex matcher
Browse files Browse the repository at this point in the history
GH-31

Signed-off-by: 35V LG84 <35vlg84-x4e6b92@e257.fi>
  • Loading branch information
35VLG84 committed Dec 22, 2024
1 parent a0d4954 commit 1f5a950
Show file tree
Hide file tree
Showing 5 changed files with 364 additions and 0 deletions.
24 changes: 24 additions & 0 deletions tackler-rs/CRATES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,30 @@ The Rusty Services are assorted bits and pieces which are needed for
Tackler, but won't fit into the domain of plain text accounting.


## Full haystack regex matchers

By default Rust `regex::Regex::is_match` will test if there is a match for the regex [anywhere in the haystack](https://docs.rs/regex/latest/regex/struct.Regex.html#method.is_match) given.

These constructors create a regex which will try to match against the full haystack by default. This logic is similar than [java.util.regex.Matcher.matches()](https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/util/regex/Matcher.html#matches())

```rust
tackler_rs::regex::{
new_full_haystack_regex,
new_full_haystack_regex_set,
peeled_pattern,
peeled_patterns
}
```

### Serializers and Deserializers for full haystack matchers

This is serializer and deserializer implementation of full haystack matcher for Serde.

```rust
tackler_rs::regex::serde::full_haystack_matcher
```


## Tackler components on Crates.io

* Tackler CLI application: [tackler](https://crates.io/crates/tackler)
Expand Down
3 changes: 3 additions & 0 deletions tackler-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ use std::io::BufWriter;
use std::path::{Path, PathBuf};
use walkdir::{DirEntry, WalkDir};

/// Regex helpers to have full haystack matcher (JDK matches())
pub mod regex;

///
/// Get full path based on
/// directory, filename prefix, filename and extension
Expand Down
202 changes: 202 additions & 0 deletions tackler-rs/src/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,205 @@
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
* OR OTHER DEALINGS IN THE SOFTWARE.
*/

/// Serialization and Deserialization for full haystack regex matchers
pub mod serde;

use regex::{Regex, RegexSet};

fn into_full_haystack_pattern<S>(re: S) -> String
where
S: AsRef<str>,
{
format!("^(?:{})$", re.as_ref())
}

fn peel_full_haystack_pattern(re: &str) -> &str {
match re.strip_prefix("^(?:") {
Some(prefix_clean) => prefix_clean.strip_suffix(r")$").unwrap_or(re),
None => re,
}
}

/// Compiles a full haystack regular expression
///
/// This will augment (anchor) the given re so that it will match against
/// full haystack.
///
/// See `Regex::Regex::new` for actual documentation of this method.
///
/// See `peeled_pattern_as_str` how to get back the original string
///
/// # Examples
/// ```rust
/// # use std::error::Error;
/// use tackler_rs::regex::new_full_haystack_regex;
///
/// let re_foo = new_full_haystack_regex("foo")?;
/// let re_bar = new_full_haystack_regex("bar")?;
///
/// assert!(re_foo.is_match("foo"));
/// assert!(re_bar.is_match("bar"));
///
/// assert!(!re_foo.is_match("foobar"));
/// assert!(!re_bar.is_match("foobar"));
/// # Ok::<(), Box<dyn Error>>(())
/// ```
pub fn new_full_haystack_regex(re: &str) -> Result<Regex, regex::Error> {
Regex::new(into_full_haystack_pattern(re).as_str())
}

/// Returns the original string of this regex.
/// # Examples
/// ```rust
/// # use std::error::Error;
/// use tackler_rs::regex::new_full_haystack_regex;
/// use tackler_rs::regex::peeled_pattern;
///
/// let re_foo = new_full_haystack_regex(r"foo.*")?;
///
/// assert_eq!(peeled_pattern(&re_foo), r"foo.*");
/// # Ok::<(), Box<dyn Error>>(())
/// ```
pub fn peeled_pattern(regex: &Regex) -> &str {
peel_full_haystack_pattern(regex.as_str())
}

/// Compiles a set of full haystack regular expressions
///
/// This will augment (anchor) the given expressions so
/// that each of those will match against full haystack.
///
/// See `Regex::RegexSet::new` for actual documentation of this method.
///
/// See `peeled_pattern` how to get back the original string
///
/// # Examples
/// ```rust
/// # use std::error::Error;
/// use tackler_rs::regex::new_full_haystack_regex_set;
///
/// let re_set = new_full_haystack_regex_set(["foo", "bar"])?;
///
/// assert!(re_set.is_match("foo"));
/// assert!(re_set.is_match("bar"));
///
/// assert!(!re_set.is_match("foobar"));
/// assert!(!re_set.is_match("foobar"));
/// # Ok::<(), Box<dyn Error>>(())
/// ```
pub fn new_full_haystack_regex_set<I, S>(exprs: I) -> Result<RegexSet, regex::Error>
where
S: AsRef<str>,
I: IntoIterator<Item = S>,
{
RegexSet::new(exprs.into_iter().map(|re| into_full_haystack_pattern(re)))
}

/// Returns the peeled regex patterns that this regex set was constructed from.
///
/// # Examples
/// ```rust
/// # use std::error::Error;
/// use tackler_rs::regex::new_full_haystack_regex_set;
/// use tackler_rs::regex::peeled_patterns;
///
/// let re_set = new_full_haystack_regex_set(["foo", "bar"])?;
///
/// assert_eq!(peeled_patterns(&re_set), vec!["foo", "bar"]);
/// # Ok::<(), Box<dyn Error>>(())
/// ```
pub fn peeled_patterns(regex_set: &RegexSet) -> Vec<String> {
regex_set
.patterns()
.iter()
.map(|re| peel_full_haystack_pattern(re).to_string())
.collect::<Vec<_>>()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_peel_full_haystack_pattern() {
assert_eq!(peel_full_haystack_pattern("abc"), "abc");
assert_eq!(peel_full_haystack_pattern(".*"), ".*");
assert_eq!(peel_full_haystack_pattern("(.*)"), "(.*)");
assert_eq!(peel_full_haystack_pattern("^(?:.*)"), "^(?:.*)");
assert_eq!(peel_full_haystack_pattern("(.*)$"), "(.*)$");
assert_eq!(peel_full_haystack_pattern("^(?:.*)$"), ".*");
}

#[test]
fn test_full_haystack_pattern() {
let re = new_full_haystack_regex(r"o.a").unwrap(/*:test:*/);
assert_eq!(re.as_str(), r"^(?:o.a)$");

assert!(!re.is_match("foobar"));
assert!(!re.is_match("ooba"));
assert!(!re.is_match("obar"));
assert!(re.is_match("oba"));
}

#[test]
fn test_full_haystack_pattern_anchored() {
let re = new_full_haystack_regex(r"^o.a$").unwrap(/*:test:*/);
assert_eq!(re.as_str(), r"^(?:^o.a$)$");

assert!(!re.is_match("foobar"));
assert!(!re.is_match("ooba"));
assert!(!re.is_match("obar"));
assert!(re.is_match("oba"));
}

#[test]
fn test_full_haystack_pattern_peeled() {
let re_str = r"^(?:o.a)$";
let re = new_full_haystack_regex(re_str).unwrap(/*:test:*/);
assert_eq!(re.as_str(), r"^(?:^(?:o.a)$)$");

assert!(!re.is_match("foobar"));
assert!(!re.is_match("ooba"));
assert!(!re.is_match("obar"));
assert!(re.is_match("oba"));

assert_eq!(peeled_pattern(&re), re_str);
}

#[test]
fn test_full_haystack_patterns() {
let re_set = new_full_haystack_regex_set([r".*foo", r"bar.*"]).unwrap(/*:test:*/);
assert_eq!(re_set.patterns(), [r"^(?:.*foo)$", r"^(?:bar.*)$"]);

assert!(!re_set.is_match("foobar"));
assert!(re_set.is_match("foo"));
assert!(re_set.is_match("bar"));
}

#[test]
fn test_full_haystack_patterns_anchored() {
let re_set = new_full_haystack_regex_set([r"^.*foo$", r"^bar.*$"]).unwrap(/*:test:*/);
assert_eq!(re_set.patterns(), [r"^(?:^.*foo$)$", r"^(?:^bar.*$)$"]);

assert!(!re_set.is_match("foobar"));
assert!(re_set.is_match("foo"));
assert!(re_set.is_match("bar"));
}

#[test]
fn test_full_haystack_patterns_peeled() {
let re_set_str = [r"^(?:.*foo)$", r"^(?:bar.*)$"];
let re_set = new_full_haystack_regex_set(re_set_str).unwrap(/*:test:*/);
assert_eq!(
re_set.patterns(),
[r"^(?:^(?:.*foo)$)$", r"^(?:^(?:bar.*)$)$"]
);

assert!(!re_set.is_match("foobar"));
assert!(re_set.is_match("foo"));
assert!(re_set.is_match("bar"));

assert_eq!(peeled_patterns(&re_set), re_set_str);
}
}
19 changes: 19 additions & 0 deletions tackler-rs/src/regex/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,22 @@
* OR OTHER DEALINGS IN THE SOFTWARE.
*/

/// Full Haystack matcher serializer and deserializer
///
/// # Example
///
/// ```rust
/// use regex::Regex;
/// use serde::{Deserialize, Serialize};
/// use tackler_rs::regex::serde::full_haystack_matcher;
///
/// #[derive(Serialize, Deserialize)]
/// struct Account {
/// #[serde(with = "full_haystack_matcher")]
/// regex: Regex,
/// }
///
/// #
/// # fn main() {}
/// ```
pub mod full_haystack_matcher;
116 changes: 116 additions & 0 deletions tackler-rs/src/regex/serde/full_haystack_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,119 @@
* OR OTHER DEALINGS IN THE SOFTWARE.
*/

//
// This code is based on: https://github.com/tailhook/serde-regex,
// which is licensed as Apache-2.0 OR MIT
//

use regex::Regex;
use std::{
borrow::Cow,
hash::Hash,
ops::{Deref, DerefMut},
};

use crate::regex::{new_full_haystack_regex, peeled_pattern};
use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer};

/// A wrapper type which implements `Serialize` and `Deserialize` for
/// types involving `Regex`
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
pub struct Serde<T>(pub T);

impl<'de> Deserialize<'de> for Serde<Regex> {
fn deserialize<D>(d: D) -> Result<Serde<Regex>, D::Error>
where
D: Deserializer<'de>,
{
let s = <Cow<str>>::deserialize(d)?;

match new_full_haystack_regex(s.as_ref()) {
Ok(regex) => Ok(Serde(regex)),
Err(err) => Err(D::Error::custom(err)),
}
}
}

/// Deserialize function, see crate docs to see how to use it
pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
where
D: Deserializer<'de>,
Serde<T>: Deserialize<'de>,
{
Serde::deserialize(deserializer).map(|x| x.0)
}

/// Serialize function, see crate docs to see how to use it
pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
for<'a> Serde<&'a T>: Serialize,
{
Serde(value).serialize(serializer)
}

impl<T> Deref for Serde<T> {
type Target = T;

fn deref(&self) -> &T {
&self.0
}
}

impl<T> DerefMut for Serde<T> {
fn deref_mut(&mut self) -> &mut T {
&mut self.0
}
}

impl<T> Serde<T> {
/// Consumes the `Serde`, returning the inner value.
pub fn into_inner(self) -> T {
self.0
}
}

impl<T> From<T> for Serde<T> {
fn from(val: T) -> Serde<T> {
Serde(val)
}
}

impl Serialize for Serde<&Regex> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
peeled_pattern(self.0).serialize(serializer)
}
}

impl Serialize for Serde<Regex> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
peeled_pattern(&self.0).serialize(serializer)
}
}

#[cfg(test)]
mod test {
use super::*;

use crate::regex::into_full_haystack_pattern;
use regex::Regex;
use serde_json::{from_str, to_string};

const SAMPLE: &str = r#"[a-z"\]]+\d{1,10}""#;
const SAMPLE_JSON: &str = r#""[a-z\"\\]]+\\d{1,10}\"""#;

#[test]
fn test_regex() {
let re: Serde<Regex> = from_str(SAMPLE_JSON).unwrap();

assert_eq!(re.as_str(), into_full_haystack_pattern(SAMPLE));
assert_eq!(to_string(&re).unwrap(), SAMPLE_JSON);
}
}

0 comments on commit 1f5a950

Please sign in to comment.