Skip to content

Commit

Permalink
Switch from regex crate to regex-lite
Browse files Browse the repository at this point in the history
  • Loading branch information
Dav1dde committed Aug 12, 2024
1 parent 40e44d1 commit fa6bd76
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 49 deletions.
2 changes: 1 addition & 1 deletion regex-filtered/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repository = "https://github.com/ua-parser/uap-rust/"
[dependencies]
aho-corasick = "1.1.3"
itertools = "0.13.0"
regex = "1.10.4"
regex-lite = "0.1"
regex-syntax = "0.8.3"

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion regex-filtered/benches/regex.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use criterion::{criterion_group, criterion_main, Criterion};

use regex::Regex;
use regex_lite::Regex;

/// On this trivial syntetic test, the results on an M1P are:
///
Expand Down
27 changes: 10 additions & 17 deletions regex-filtered/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub use model::Error as ModelError;

/// Builder for the regexes set
pub struct Builder {
regexes: Vec<regex::Regex>,
regexes: Vec<regex_lite::Regex>,
mapper_builder: mapper::Builder,
}

Expand Down Expand Up @@ -69,8 +69,8 @@ impl Options {
self.crlf = yes;
self
}
fn to_regex(&self, pattern: &str) -> Result<regex::Regex, regex::Error> {
regex::RegexBuilder::new(pattern)
fn to_regex(&self, pattern: &str) -> Result<regex_lite::Regex, regex_lite::Error> {
regex_lite::RegexBuilder::new(pattern)
.case_insensitive(self.case_insensitive)
.dot_matches_new_line(self.dot_matches_new_line)
.ignore_whitespace(self.ignore_whitespace)
Expand Down Expand Up @@ -113,16 +113,12 @@ pub enum ParseError {
/// An error occurred while processing the regex for atom
/// extraction.
ProcessingError(ModelError),
/// The regex was too large to compile to the NFA (within the
/// default limits).
RegexTooLarge(usize),
}
impl std::error::Error for ParseError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
ParseError::ProcessingError(e) => Some(e),
ParseError::SyntaxError(_) => None,
ParseError::RegexTooLarge(_) => None,
}
}
}
Expand All @@ -136,12 +132,9 @@ impl From<regex_syntax::Error> for ParseError {
Self::SyntaxError(value.to_string())
}
}
impl From<regex::Error> for ParseError {
fn from(value: regex::Error) -> Self {
match value {
regex::Error::CompiledTooBig(v) => Self::RegexTooLarge(v),
e => Self::SyntaxError(e.to_string()),
}
impl From<regex_lite::Error> for ParseError {
fn from(value: regex_lite::Error) -> Self {
Self::SyntaxError(value.to_string())
}
}
impl From<ModelError> for ParseError {
Expand Down Expand Up @@ -196,7 +189,7 @@ impl Builder {
}

/// Currently loaded regexes.
pub fn regexes(&self) -> &[regex::Regex] {
pub fn regexes(&self) -> &[regex_lite::Regex] {
&self.regexes
}

Expand Down Expand Up @@ -262,7 +255,7 @@ impl Default for Builder {
/// Regexes set, allows testing inputs against a *large* number of
/// *non-trivial* regexes.
pub struct Regexes {
regexes: Vec<regex::Regex>,
regexes: Vec<regex_lite::Regex>,
mapper: mapper::Mapper,
prefilter: AhoCorasick,
}
Expand Down Expand Up @@ -299,15 +292,15 @@ impl Regexes {
pub fn matching<'a>(
&'a self,
haystack: &'a str,
) -> impl Iterator<Item = (usize, &regex::Regex)> + 'a {
) -> impl Iterator<Item = (usize, &regex_lite::Regex)> + 'a {
self.prefiltered(haystack).filter_map(move |idx| {
let r = &self.regexes[idx];
r.is_match(haystack).then_some((idx, r))
})
}

/// Returns a reference to all the regexes in the set.
pub fn regexes(&self) -> &[regex::Regex] {
pub fn regexes(&self) -> &[regex_lite::Regex] {
&self.regexes
}
}
Expand Down
2 changes: 1 addition & 1 deletion ua-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repository = "https://github.com/ua-parser/uap-rust/"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
regex = "1.10.4"
regex-lite = "0.1"
regex-filtered = { version = "0.2.0", path = "../regex-filtered" }
serde = { version = "1.0.203", features = ["derive"] }

Expand Down
35 changes: 7 additions & 28 deletions ua-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#![allow(clippy::empty_docs)]
#![doc = include_str!("../README.md")]

use regex::Captures;
use regex_lite::Captures;
use serde::Deserialize;

pub use regex_filtered::{BuildError, ParseError};
Expand Down Expand Up @@ -666,29 +666,6 @@ fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> {
']' if !escape => {
inclass += 1;
}
// no need for special cases because regex allows nesting
// character classes, whereas js or python don't \o/
'd' if escape => {
// idx is d so idx-1 is \\, and we want to exclude it
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[0-9]");
}
'D' if escape => {
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[^0-9]");
}
'w' if escape => {
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[A-Za-z0-9_]");
}
'W' if escape => {
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[^A-Za-z0-9_]");
}
_ => (),
}
escape = false;
Expand Down Expand Up @@ -736,9 +713,11 @@ mod test_rewrite_regex {
}

#[test]
fn rewrite_classes() {
assert_eq!(rewrite(r"\dx"), "[0-9]x");
assert_eq!(rewrite(r"\wx"), "[A-Za-z0-9_]x");
assert_eq!(rewrite(r"[\d]x"), r"[[0-9]]x");
fn dont_rewrite_classes() {
assert_eq!(rewrite(r"\dx"), r"\dx");
assert_eq!(rewrite(r"\wx"), r"\wx");
assert_eq!(rewrite(r"[\d]x"), r"[\d]x");
assert_eq!(rewrite(r"[\{}]x"), r"[\{}]x");
assert_eq!(rewrite(r"\{\}x"), r"\{\}x");
}
}
2 changes: 1 addition & 1 deletion ua-parser/src/resolvers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// required, if any group is optional that returns `None`.

use crate::Error;
use regex::Captures;
use regex_lite::Captures;
use std::borrow::Cow;

fn get<'s>(c: &Captures<'s>, group: usize) -> Option<&'s str> {
Expand Down

0 comments on commit fa6bd76

Please sign in to comment.