From 28a2a6c7369a9106e40520ad2004d3786dd1f280 Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Fri, 25 Oct 2024 14:11:11 +0200 Subject: [PATCH] feat(restricted_glob): negated globs (#4377) --- .../src/utils/restricted_glob.rs | 149 +++++++++++++++--- 1 file changed, 128 insertions(+), 21 deletions(-) diff --git a/crates/biome_js_analyze/src/utils/restricted_glob.rs b/crates/biome_js_analyze/src/utils/restricted_glob.rs index 677aea541483..78a5d132b695 100644 --- a/crates/biome_js_analyze/src/utils/restricted_glob.rs +++ b/crates/biome_js_analyze/src/utils/restricted_glob.rs @@ -7,27 +7,56 @@ use biome_rowan::{TextRange, TextSize}; /// - Use `\*` to escape `*` /// - `?`, `[`, `]`, `{`, and `}` must be escaped using `\`. /// These characters are reserved for future use. -/// - `!` must be escaped if it is the first character of the pattern +/// - Use `!` as first character to negate the glob /// /// A path segment is delimited by path separator `/` or the start/end of the path. #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] #[serde(try_from = "String", into = "String")] -pub struct RestrictedGlob(globset::GlobMatcher); +pub struct RestrictedGlob { + is_negated: bool, + glob: globset::GlobMatcher, +} impl RestrictedGlob { - /// Tests whether the given path matches this pattern or not. + /// Returns `true` if this glob is negated. + /// + /// ``` + /// use biome_js_analyze::utils::restricted_glob::RestrictedGlob; + /// + /// let glob = "!*.js".parse::().unwrap(); + /// assert!(glob.is_negated()); + /// + /// let glob = "*.js".parse::().unwrap(); + /// assert!(!glob.is_negated()); + /// ``` + pub fn is_negated(&self) -> bool { + self.is_negated + } + + /// Tests whether the given path matches this pattern. pub fn is_match(&self, path: impl AsRef) -> bool { - self.0.is_match(path) + self.is_raw_match(path) != self.is_negated } - /// Tests whether the given path matches this pattern or not. + /// Tests whether the given path matches this pattern, ignoring the negation. + fn is_raw_match(&self, path: impl AsRef) -> bool { + self.glob.is_match(path) + } + + /// Tests whether the given path matches this pattern. pub fn is_match_candidate(&self, path: &CandidatePath<'_>) -> bool { - self.0.is_match_candidate(&path.0) + self.is_raw_match_candidate(path) != self.is_negated + } + + /// Tests whether the given path matches this pattern, ignoring the negation. + fn is_raw_match_candidate(&self, path: &CandidatePath<'_>) -> bool { + self.glob.is_match_candidate(&path.0) } } impl std::fmt::Display for RestrictedGlob { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let repr = self.0.glob().to_string(); - f.write_str(&repr) + let repr = self.glob.glob().to_string(); + let negation = if self.is_negated { "!" } else { "" }; + write!(f, "{negation}{repr}") } } impl From for String { @@ -38,6 +67,11 @@ impl From for String { impl std::str::FromStr for RestrictedGlob { type Err = RestrictedGlobError; fn from_str(value: &str) -> Result { + let (is_negated, value) = if let Some(stripped) = value.strip_prefix('!') { + (true, stripped) + } else { + (false, value) + }; validate_restricted_glob(value)?; let mut glob_builder = globset::GlobBuilder::new(value); // Allow escaping with `\` on all platforms. @@ -45,7 +79,10 @@ impl std::str::FromStr for RestrictedGlob { // Only `**` can match `/` glob_builder.literal_separator(true); match glob_builder.build() { - Ok(glob) => Ok(RestrictedGlob(glob.compile_matcher())), + Ok(glob) => Ok(RestrictedGlob { + is_negated, + glob: glob.compile_matcher(), + }), Err(error) => Err(RestrictedGlobError::Generic( error.kind().to_string().into_boxed_str(), )), @@ -98,12 +135,59 @@ impl schemars::JsonSchema for RestrictedGlob { /// Constructing candidates has a very small cost associated with it, so /// callers may find it beneficial to amortize that cost when matching a single /// path against multiple globs or sets of globs. +#[derive(Debug, Clone)] pub struct CandidatePath<'a>(globset::Candidate<'a>); impl<'a> CandidatePath<'a> { /// Create a new candidate for matching from the given path. pub fn new(path: &'a impl AsRef) -> Self { Self(globset::Candidate::new(path)) } + + /// Tests whether the current path matches `glob`. + pub fn matches(&self, glob: &RestrictedGlob) -> bool { + glob.is_match_candidate(self) + } + + /// Match against a list of globs where negated globs are handled as exceptions. + /// + /// Let's take an example: + /// + /// ``` + /// use biome_js_analyze::utils::restricted_glob::{CandidatePath, RestrictedGlob}; + /// + /// let globs: &[RestrictedGlob] = &[ + /// "*".parse().unwrap(), + /// "!a*".parse().unwrap(), + /// "a".parse().unwrap(), + /// ]; + /// + /// assert!(CandidatePath::new(&"b").matches_with_exceptions(globs)); + /// assert!(CandidatePath::new(&"a").matches_with_exceptions(globs)); + /// + /// assert!(!CandidatePath::new(&"abc").matches_with_exceptions(globs)); + /// ``` + /// + /// - `b` matches `*` and is not excluded by the exception `!a*`. + /// Thus, `b` matches the list of globs. + /// - `abc` matches the first glob `*`, however it is excluded by the exception `!a*`. + /// Thus `abc` doesn't match the list of globs. + /// - `a` matches the first glob `*` and is excluded by the exception `!a*`. + /// However, it is included again by the last glob `a`. + /// Thus `a` matches the list of globs. + /// + pub fn matches_with_exceptions<'b, I>(&self, globs: I) -> bool + where + I: IntoIterator, + I::IntoIter: DoubleEndedIterator, + { + // Iterate in reverse order to avoid unnecessary glob matching. + for glob in globs.into_iter().rev() { + if glob.is_raw_match_candidate(self) { + return !glob.is_negated(); + } + } + false + } } #[derive(Debug)] @@ -144,7 +228,6 @@ pub enum RestrictedGlobErrorKind { UnsupportedAlternates, UnsupportedCharacterClass, UnsupportedAnyCharacter, - UnsupportedNegation, } impl std::fmt::Display for RestrictedGlobErrorKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -162,9 +245,6 @@ impl std::fmt::Display for RestrictedGlobErrorKind { Self::UnsupportedAnyCharacter => { r"`?` matcher is not supported. Use `\?` to escape the character." } - Self::UnsupportedNegation => { - r"Negated globs `!` are not supported. Use `\!` to escape the character." - } }; write!(f, "{desc}") } @@ -175,12 +255,6 @@ fn validate_restricted_glob(pattern: &str) -> Result<(), RestrictedGlobError> { let mut it = pattern.bytes().enumerate(); while let Some((i, c)) = it.next() { match c { - b'!' if i == 0 => { - return Err(RestrictedGlobError::Regular { - kind: RestrictedGlobErrorKind::UnsupportedNegation, - index: i as u32, - }); - } b'\\' => { // Accept a restrictive set of escape sequence if let Some((j, c)) = it.next() { @@ -227,19 +301,21 @@ fn validate_restricted_glob(pattern: &str) -> Result<(), RestrictedGlobError> { #[cfg(test)] mod tests { + use std::str::FromStr; + use super::*; #[test] fn test_validate_restricted_glob() { - assert!(validate_restricted_glob("!*.js").is_err()); assert!(validate_restricted_glob("*.[jt]s").is_err()); assert!(validate_restricted_glob("*.{js,ts}").is_err()); assert!(validate_restricted_glob("?*.js").is_err()); assert!(validate_restricted_glob(r"\").is_err()); assert!(validate_restricted_glob(r"\n").is_err()); assert!(validate_restricted_glob(r"\😀").is_err()); - assert!(validate_restricted_glob("!").is_err()); + assert!(validate_restricted_glob("!*.js").is_ok()); + assert!(validate_restricted_glob("!").is_ok()); assert!(validate_restricted_glob("*.js").is_ok()); assert!(validate_restricted_glob("**/*.js").is_ok()); assert!(validate_restricted_glob(r"\*").is_ok()); @@ -258,4 +334,35 @@ mod tests { .unwrap() .is_match("file/path.js")); } + + #[test] + fn test_match_with_exceptions() { + let a = CandidatePath::new(&"a"); + + assert!(a.matches_with_exceptions(&[ + RestrictedGlob::from_str("*").unwrap(), + RestrictedGlob::from_str("!b").unwrap(), + ])); + assert!(!a.matches_with_exceptions(&[ + RestrictedGlob::from_str("*").unwrap(), + RestrictedGlob::from_str("!a*").unwrap(), + ])); + assert!(a.matches_with_exceptions(&[ + RestrictedGlob::from_str("*").unwrap(), + RestrictedGlob::from_str("!a*").unwrap(), + RestrictedGlob::from_str("a").unwrap(), + ])); + } + + #[test] + fn test_to_string() { + assert_eq!( + RestrictedGlob::from_str("**/*.js").unwrap().to_string(), + "**/*.js" + ); + assert_eq!( + RestrictedGlob::from_str("!**/*.js").unwrap().to_string(), + "!**/*.js" + ); + } }