Skip to content

Commit

Permalink
feat(restricted_glob): negated globs (#4377)
Browse files Browse the repository at this point in the history
  • Loading branch information
Conaclos authored Oct 25, 2024
1 parent 53e2e86 commit 28a2a6c
Showing 1 changed file with 128 additions and 21 deletions.
149 changes: 128 additions & 21 deletions crates/biome_js_analyze/src/utils/restricted_glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,56 @@ use biome_rowan::{TextRange, TextSize};
/// - Use `\*` to escape `*`
/// - `?`, `[`, `]`, `{`, and `}` must be escaped using `\`.
/// These characters are reserved for future use.
/// - `!` must be escaped if it is the first character of the pattern
/// - Use `!` as first character to negate the glob
///
/// A path segment is delimited by path separator `/` or the start/end of the path.
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[serde(try_from = "String", into = "String")]
pub struct RestrictedGlob(globset::GlobMatcher);
pub struct RestrictedGlob {
is_negated: bool,
glob: globset::GlobMatcher,
}
impl RestrictedGlob {
/// Tests whether the given path matches this pattern or not.
/// Returns `true` if this glob is negated.
///
/// ```
/// use biome_js_analyze::utils::restricted_glob::RestrictedGlob;
///
/// let glob = "!*.js".parse::<RestrictedGlob>().unwrap();
/// assert!(glob.is_negated());
///
/// let glob = "*.js".parse::<RestrictedGlob>().unwrap();
/// assert!(!glob.is_negated());
/// ```
pub fn is_negated(&self) -> bool {
self.is_negated
}

/// Tests whether the given path matches this pattern.
pub fn is_match(&self, path: impl AsRef<std::path::Path>) -> bool {
self.0.is_match(path)
self.is_raw_match(path) != self.is_negated
}

/// Tests whether the given path matches this pattern or not.
/// Tests whether the given path matches this pattern, ignoring the negation.
fn is_raw_match(&self, path: impl AsRef<std::path::Path>) -> bool {
self.glob.is_match(path)
}

/// Tests whether the given path matches this pattern.
pub fn is_match_candidate(&self, path: &CandidatePath<'_>) -> bool {
self.0.is_match_candidate(&path.0)
self.is_raw_match_candidate(path) != self.is_negated
}

/// Tests whether the given path matches this pattern, ignoring the negation.
fn is_raw_match_candidate(&self, path: &CandidatePath<'_>) -> bool {
self.glob.is_match_candidate(&path.0)
}
}
impl std::fmt::Display for RestrictedGlob {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let repr = self.0.glob().to_string();
f.write_str(&repr)
let repr = self.glob.glob().to_string();
let negation = if self.is_negated { "!" } else { "" };
write!(f, "{negation}{repr}")
}
}
impl From<RestrictedGlob> for String {
Expand All @@ -38,14 +67,22 @@ impl From<RestrictedGlob> for String {
impl std::str::FromStr for RestrictedGlob {
type Err = RestrictedGlobError;
fn from_str(value: &str) -> Result<Self, Self::Err> {
let (is_negated, value) = if let Some(stripped) = value.strip_prefix('!') {
(true, stripped)
} else {
(false, value)
};
validate_restricted_glob(value)?;
let mut glob_builder = globset::GlobBuilder::new(value);
// Allow escaping with `\` on all platforms.
glob_builder.backslash_escape(true);
// Only `**` can match `/`
glob_builder.literal_separator(true);
match glob_builder.build() {
Ok(glob) => Ok(RestrictedGlob(glob.compile_matcher())),
Ok(glob) => Ok(RestrictedGlob {
is_negated,
glob: glob.compile_matcher(),
}),
Err(error) => Err(RestrictedGlobError::Generic(
error.kind().to_string().into_boxed_str(),
)),
Expand Down Expand Up @@ -98,12 +135,59 @@ impl schemars::JsonSchema for RestrictedGlob {
/// Constructing candidates has a very small cost associated with it, so
/// callers may find it beneficial to amortize that cost when matching a single
/// path against multiple globs or sets of globs.
#[derive(Debug, Clone)]
pub struct CandidatePath<'a>(globset::Candidate<'a>);
impl<'a> CandidatePath<'a> {
/// Create a new candidate for matching from the given path.
pub fn new(path: &'a impl AsRef<std::path::Path>) -> Self {
Self(globset::Candidate::new(path))
}

/// Tests whether the current path matches `glob`.
pub fn matches(&self, glob: &RestrictedGlob) -> bool {
glob.is_match_candidate(self)
}

/// Match against a list of globs where negated globs are handled as exceptions.
///
/// Let's take an example:
///
/// ```
/// use biome_js_analyze::utils::restricted_glob::{CandidatePath, RestrictedGlob};
///
/// let globs: &[RestrictedGlob] = &[
/// "*".parse().unwrap(),
/// "!a*".parse().unwrap(),
/// "a".parse().unwrap(),
/// ];
///
/// assert!(CandidatePath::new(&"b").matches_with_exceptions(globs));
/// assert!(CandidatePath::new(&"a").matches_with_exceptions(globs));
///
/// assert!(!CandidatePath::new(&"abc").matches_with_exceptions(globs));
/// ```
///
/// - `b` matches `*` and is not excluded by the exception `!a*`.
/// Thus, `b` matches the list of globs.
/// - `abc` matches the first glob `*`, however it is excluded by the exception `!a*`.
/// Thus `abc` doesn't match the list of globs.
/// - `a` matches the first glob `*` and is excluded by the exception `!a*`.
/// However, it is included again by the last glob `a`.
/// Thus `a` matches the list of globs.
///
pub fn matches_with_exceptions<'b, I>(&self, globs: I) -> bool
where
I: IntoIterator<Item = &'b RestrictedGlob>,
I::IntoIter: DoubleEndedIterator,
{
// Iterate in reverse order to avoid unnecessary glob matching.
for glob in globs.into_iter().rev() {
if glob.is_raw_match_candidate(self) {
return !glob.is_negated();
}
}
false
}
}

#[derive(Debug)]
Expand Down Expand Up @@ -144,7 +228,6 @@ pub enum RestrictedGlobErrorKind {
UnsupportedAlternates,
UnsupportedCharacterClass,
UnsupportedAnyCharacter,
UnsupportedNegation,
}
impl std::fmt::Display for RestrictedGlobErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Expand All @@ -162,9 +245,6 @@ impl std::fmt::Display for RestrictedGlobErrorKind {
Self::UnsupportedAnyCharacter => {
r"`?` matcher is not supported. Use `\?` to escape the character."
}
Self::UnsupportedNegation => {
r"Negated globs `!` are not supported. Use `\!` to escape the character."
}
};
write!(f, "{desc}")
}
Expand All @@ -175,12 +255,6 @@ fn validate_restricted_glob(pattern: &str) -> Result<(), RestrictedGlobError> {
let mut it = pattern.bytes().enumerate();
while let Some((i, c)) = it.next() {
match c {
b'!' if i == 0 => {
return Err(RestrictedGlobError::Regular {
kind: RestrictedGlobErrorKind::UnsupportedNegation,
index: i as u32,
});
}
b'\\' => {
// Accept a restrictive set of escape sequence
if let Some((j, c)) = it.next() {
Expand Down Expand Up @@ -227,19 +301,21 @@ fn validate_restricted_glob(pattern: &str) -> Result<(), RestrictedGlobError> {

#[cfg(test)]
mod tests {
use std::str::FromStr;

use super::*;

#[test]
fn test_validate_restricted_glob() {
assert!(validate_restricted_glob("!*.js").is_err());
assert!(validate_restricted_glob("*.[jt]s").is_err());
assert!(validate_restricted_glob("*.{js,ts}").is_err());
assert!(validate_restricted_glob("?*.js").is_err());
assert!(validate_restricted_glob(r"\").is_err());
assert!(validate_restricted_glob(r"\n").is_err());
assert!(validate_restricted_glob(r"\😀").is_err());
assert!(validate_restricted_glob("!").is_err());

assert!(validate_restricted_glob("!*.js").is_ok());
assert!(validate_restricted_glob("!").is_ok());
assert!(validate_restricted_glob("*.js").is_ok());
assert!(validate_restricted_glob("**/*.js").is_ok());
assert!(validate_restricted_glob(r"\*").is_ok());
Expand All @@ -258,4 +334,35 @@ mod tests {
.unwrap()
.is_match("file/path.js"));
}

#[test]
fn test_match_with_exceptions() {
let a = CandidatePath::new(&"a");

assert!(a.matches_with_exceptions(&[
RestrictedGlob::from_str("*").unwrap(),
RestrictedGlob::from_str("!b").unwrap(),
]));
assert!(!a.matches_with_exceptions(&[
RestrictedGlob::from_str("*").unwrap(),
RestrictedGlob::from_str("!a*").unwrap(),
]));
assert!(a.matches_with_exceptions(&[
RestrictedGlob::from_str("*").unwrap(),
RestrictedGlob::from_str("!a*").unwrap(),
RestrictedGlob::from_str("a").unwrap(),
]));
}

#[test]
fn test_to_string() {
assert_eq!(
RestrictedGlob::from_str("**/*.js").unwrap().to_string(),
"**/*.js"
);
assert_eq!(
RestrictedGlob::from_str("!**/*.js").unwrap().to_string(),
"!**/*.js"
);
}
}

0 comments on commit 28a2a6c

Please sign in to comment.