Skip to content

Commit

Permalink
feat:: add restricted glob
Browse files Browse the repository at this point in the history
  • Loading branch information
Conaclos committed Oct 22, 2024
1 parent f8946c0 commit ae3e20e
Show file tree
Hide file tree
Showing 8 changed files with 508 additions and 107 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ crossbeam = "0.8.4"
dashmap = "6.1.0"
enumflags2 = "0.7.10"
getrandom = "0.2.15"
globset = "0.4.15"
ignore = "0.4.23"
indexmap = { version = "2.6.0", features = ["serde"] }
insta = "1.40.0"
Expand Down
1 change: 1 addition & 0 deletions crates/biome_js_analyze/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ biome_suppression = { workspace = true }
biome_unicode_table = { workspace = true }
bitvec = "1.0.1"
enumflags2 = { workspace = true }
globset = { workspace = true }
natord = { workspace = true }
regex = { workspace = true }
roaring = "0.10.6"
Expand Down
52 changes: 49 additions & 3 deletions crates/biome_js_analyze/src/assists/source/organize_imports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ use biome_analyze::{
context::RuleContext, declare_source_rule, ActionCategory, Ast, FixKind, Rule, SourceActionKind,
};
use biome_console::markup;
use biome_deserialize::Deserializable;
use biome_deserialize_macros::Deserializable;
use biome_js_syntax::JsModule;
use biome_rowan::BatchMutationExt;

use crate::JsRuleAction;
use crate::{utils::restricted_glob::RestrictedGlob, JsRuleAction};

pub mod util;
pub mod legacy;
pub mod util;

declare_source_rule! {
/// Provides a whole-source code action to sort the imports in the file
Expand Down Expand Up @@ -47,7 +49,7 @@ impl Rule for OrganizeImports {
type Query = Ast<JsModule>;
type State = State;
type Signals = Option<Self::State>;
type Options = ();
type Options = Options;

fn run(ctx: &RuleContext<Self>) -> Option<Self::State> {
let root = ctx.query();
Expand Down Expand Up @@ -76,3 +78,47 @@ pub enum State {
Legacy(legacy::ImportGroups),
Modern,
}

#[derive(Clone, Debug, Default, serde::Deserialize, Deserializable, serde::Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[serde(rename_all = "camelCase", deny_unknown_fields, default)]
pub struct Options {
legacy: bool,
import_groups: Box<[ImportGroup]>,
}

#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[serde(untagged)]
pub enum ImportGroup {
Predefined(PredefinedImportGroup),
Custom(RestrictedGlob),
}
impl Deserializable for ImportGroup {
fn deserialize(
value: &impl biome_deserialize::DeserializableValue,
name: &str,
diagnostics: &mut Vec<biome_deserialize::DeserializationDiagnostic>,
) -> Option<Self> {
Some(
if let Some(predefined) = Deserializable::deserialize(value, name, diagnostics) {
ImportGroup::Predefined(predefined)
} else {
ImportGroup::Custom(Deserializable::deserialize(value, name, diagnostics)?)
},
)
}
}

#[derive(Clone, Debug, serde::Deserialize, Deserializable, Eq, PartialEq, serde::Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub enum PredefinedImportGroup {
#[serde(rename = ":blank-line:")]
BlankLine,
#[serde(rename = ":bun:")]
Bun,
#[serde(rename = ":node:")]
Node,
#[serde(rename = ":types:")]
Types,
}
1 change: 1 addition & 0 deletions crates/biome_js_analyze/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::iter;

pub mod batch;
pub mod rename;
pub mod restricted_glob;
pub mod restricted_regex;
#[cfg(test)]
pub mod tests;
Expand Down
261 changes: 261 additions & 0 deletions crates/biome_js_analyze/src/utils/restricted_glob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
use biome_rowan::{TextRange, TextSize};

/// A restricted glob pattern only supports the following syntaxes:
///
/// - star `*` that matches zero or more characters inside a path segment
/// - globstar `**` that matches zero or more path segments
/// - Use `\*` to escape `*`
/// - `?`, `[`, `]`, `{`, and `}` must be escaped using `\`.
/// These characters are reserved for future use.
/// - `!` must be escaped if it is the first character of the pattern
///
/// A path segment is delimited by path separator `/` or the start/end of the path.
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[serde(try_from = "String", into = "String")]
pub struct RestrictedGlob(globset::GlobMatcher);
impl RestrictedGlob {
/// Tests whether the given path matches this pattern or not.
pub fn is_match(&self, path: impl AsRef<std::path::Path>) -> bool {
self.0.is_match(path)
}

/// Tests whether the given path matches this pattern or not.
pub fn is_match_candidate(&self, path: &CandidatePath<'_>) -> bool {
self.0.is_match_candidate(&path.0)
}
}
impl std::fmt::Display for RestrictedGlob {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let repr = self.0.glob().to_string();
f.write_str(&repr)
}
}
impl From<RestrictedGlob> for String {
fn from(value: RestrictedGlob) -> Self {
value.to_string()
}
}
impl std::str::FromStr for RestrictedGlob {
type Err = RestrictedGlobError;
fn from_str(value: &str) -> Result<Self, Self::Err> {
validate_restricted_glob(value)?;
let mut glob_builder = globset::GlobBuilder::new(value);
// Allow escaping with `\` on all platforms.
glob_builder.backslash_escape(true);
// Only `**` can match `/`
glob_builder.literal_separator(true);
match glob_builder.build() {
Ok(glob) => Ok(RestrictedGlob(glob.compile_matcher())),
Err(error) => Err(RestrictedGlobError::Generic(
error.kind().to_string().into_boxed_str(),
)),
}
}
}
impl TryFrom<String> for RestrictedGlob {
type Error = RestrictedGlobError;
fn try_from(value: String) -> Result<Self, Self::Error> {
value.parse()
}
}
// We use a custom impl to precisely report the location of the error.
impl biome_deserialize::Deserializable for RestrictedGlob {
fn deserialize(
value: &impl biome_deserialize::DeserializableValue,
name: &str,
diagnostics: &mut Vec<biome_deserialize::DeserializationDiagnostic>,
) -> Option<Self> {
let glob = String::deserialize(value, name, diagnostics)?;
match glob.parse() {
Ok(glob) => Some(glob),
Err(error) => {
let range = value.range();
let range = error.index().map_or(range, |index| {
TextRange::at(range.start() + TextSize::from(1 + index), 1u32.into())
});
diagnostics.push(
biome_deserialize::DeserializationDiagnostic::new(format_args!("{error}"))
.with_range(range),
);
None
}
}
}
}
#[cfg(feature = "schemars")]
impl schemars::JsonSchema for RestrictedGlob {
fn schema_name() -> String {
"Regex".to_string()
}

fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
String::json_schema(gen)
}
}

/// A candidate path for matching.
///
/// Constructing candidates has a very small cost associated with it, so
/// callers may find it beneficial to amortize that cost when matching a single
/// path against multiple globs or sets of globs.
pub struct CandidatePath<'a>(globset::Candidate<'a>);
impl<'a> CandidatePath<'a> {
/// Create a new candidate for matching from the given path.
pub fn new(path: &'a impl AsRef<std::path::Path>) -> Self {
Self(globset::Candidate::new(path))
}
}

#[derive(Debug)]
pub enum RestrictedGlobError {
Regular {
kind: RestrictedGlobErrorKind,
index: u32,
},
/// Error caused by a a third-party module.
Generic(Box<str>),
}
impl RestrictedGlobError {
/// Returns the index in the glob where the error is located.
pub fn index(&self) -> Option<u32> {
match self {
Self::Regular { index, .. } => Some(*index),
Self::Generic(_) => None,
}
}
}
impl std::error::Error for RestrictedGlobError {}
impl std::fmt::Display for RestrictedGlobError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Regular { kind, .. } => write!(f, "{kind}"),
Self::Generic(desc) => write!(f, "{desc}"),
}
}
}

#[derive(Debug, Eq, PartialEq)]
pub enum RestrictedGlobErrorKind {
/// Occurs when an unescaped '\' is found at the end of a glob.
DanglingEscape,
/// Occurs when an invalid escape is found.
/// If the character is not set, then it is an invalid UTF-8 character.
InvalidEscape(char),
UnsupportedAlternates,
UnsupportedCharacterClass,
UnsupportedAnyCharacter,
UnsupportedNegation,
}
impl std::fmt::Display for RestrictedGlobErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let desc = match self {
Self::DanglingEscape => "Unterminated escape sequence.",
Self::InvalidEscape(c) => {
return write!(f, "The escape sequence `\\{c}` is not supported.");
}
Self::UnsupportedAlternates => {
r"Alternates `{}` are not supported. Use `\{` and `\}` to escape the characters."
}
Self::UnsupportedCharacterClass => {
r"Character class `[]` are not supported. Use `\[` and `\]` to escape the characters."
}
Self::UnsupportedAnyCharacter => {
r"`?` matcher is not supported. Use `\?` to escape the character."
}
Self::UnsupportedNegation => {
r"Negated globs `!` are not supported. Use `\!` to escape the character."
}
};
write!(f, "{desc}")
}
}

/// Returns an error if `pattern` doesn't follow the restricted glob syntax.
fn validate_restricted_glob(pattern: &str) -> Result<(), RestrictedGlobError> {
let mut it = pattern.bytes().enumerate();
while let Some((i, c)) = it.next() {
match c {
b'!' if i == 0 => {
return Err(RestrictedGlobError::Regular {
kind: RestrictedGlobErrorKind::UnsupportedNegation,
index: i as u32,
});
}
b'\\' => {
// Accept a restrictive set of escape sequence
if let Some((j, c)) = it.next() {
if !matches!(c, b'!' | b'*' | b'?' | b'{' | b'}' | b'[' | b']' | b'\\') {
return Err(RestrictedGlobError::Regular {
kind: RestrictedGlobErrorKind::InvalidEscape(
// SAFETY: the index `j` starts a new character
// because it is preceded by the character `\\`.
pattern[j..].chars().next().expect("valid character"),
),
index: i as u32,
});
}
} else {
return Err(RestrictedGlobError::Regular {
kind: RestrictedGlobErrorKind::DanglingEscape,
index: i as u32,
});
}
}
b'?' => {
return Err(RestrictedGlobError::Regular {
kind: RestrictedGlobErrorKind::UnsupportedAnyCharacter,
index: i as u32,
});
}
b'[' | b']' => {
return Err(RestrictedGlobError::Regular {
kind: RestrictedGlobErrorKind::UnsupportedCharacterClass,
index: i as u32,
});
}
b'{' | b'}' => {
return Err(RestrictedGlobError::Regular {
kind: RestrictedGlobErrorKind::UnsupportedAlternates,
index: i as u32,
});
}
_ => {}
}
}
Ok(())
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_validate_restricted_glob() {
assert!(validate_restricted_glob("!*.js").is_err());
assert!(validate_restricted_glob("*.[jt]s").is_err());
assert!(validate_restricted_glob("*.{js,ts}").is_err());
assert!(validate_restricted_glob("?*.js").is_err());
assert!(validate_restricted_glob(r"\").is_err());
assert!(validate_restricted_glob(r"\n").is_err());
assert!(validate_restricted_glob(r"\😀").is_err());
assert!(validate_restricted_glob("!").is_err());

assert!(validate_restricted_glob("*.js").is_ok());
assert!(validate_restricted_glob("**/*.js").is_ok());
assert!(validate_restricted_glob(r"\*").is_ok());
assert!(validate_restricted_glob(r"\!").is_ok());
}

#[test]
fn test_restricted_regex() {
assert!(!"*.js"
.parse::<RestrictedGlob>()
.unwrap()
.is_match("file/path.js"));

assert!("**/*.js"
.parse::<RestrictedGlob>()
.unwrap()
.is_match("file/path.js"));
}
}
Loading

0 comments on commit ae3e20e

Please sign in to comment.