Skip to content

Commit

Permalink
Replace glob_match with globset.
Browse files Browse the repository at this point in the history
The glob_match crate has a bug with a pattern like **/*test*.py ,
where a file like abc/test/def/test.py does not match.

Or even t/test.py does not match.
  • Loading branch information
jacobotb committed Mar 6, 2024
1 parent 97d161b commit cfbb3a1
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 49 deletions.
2 changes: 1 addition & 1 deletion LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ bitflags,https://github.com/bitflags/bitflags,MIT,Copyright (c) 2014 The Rust Pr
cmake-rs,https://github.com/rust-lang/cmake-rs,MIT,Copyright (c) 2014 Alex Crichton
deno-core,https://github.com/denoland/deno,MIT,Copyright 2018-2023 the Deno authors
git2,https://crates.io/crates/git2,MIT,Copyright (c) 2014 Alex Crichton
glob-match,https://crates.io/crates/glob-match,MIT, Copyright (c) 2023 Devon Govett
globset,https://crates.io/crates/globset,MIT,Copyright (c) 2015 Andrew Gallant
indicatif,https://crates.io/crates/indicatif,MIT,Copyright (c) 2017 Armin Ronacher <armin.ronacher@active-4.com>
itertools,https://github.com/rust-itertools/itertools,MIT,Copyright 2015 itertools Developers
lazy_static,https://crates.io/crates/lazy_static,MIT,Copyright 2016 lazy-static.rs Developers
Expand Down
23 changes: 18 additions & 5 deletions crates/bins/src/bin/datadog-static-analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,20 @@ fn print_configuration(configuration: &CliConfiguration) {
let ignore_paths_str = if configuration.path_config.ignore.is_empty() {
"no ignore path".to_string()
} else {
configuration.path_config.ignore.join(",")
configuration
.path_config
.ignore
.iter()
.map(|p| p.clone().into())
.collect::<Vec<String>>()
.join(",")
};
let only_paths_str = match &configuration.path_config.only {
Some(x) => x.join(","),
Some(x) => x
.iter()
.map(|p| p.clone().into())
.collect::<Vec<String>>()
.join(","),
None => "all paths".to_string(),
};

Expand Down Expand Up @@ -293,14 +303,17 @@ fn main() -> Result<()> {
}

// add ignore path from the options
path_config.ignore.extend(ignore_paths_from_options);
path_config
.ignore
.extend(ignore_paths_from_options.iter().map(|p| p.clone().into()));

// ignore all directories that are in gitignore
if !ignore_gitignore {
let paths_from_gitignore = read_files_from_gitignore(directory_to_analyze.as_str());
let paths_from_gitignore = read_files_from_gitignore(directory_to_analyze.as_str())
.expect("error when reading gitignore file");
path_config
.ignore
.extend(paths_from_gitignore.expect("error when reading gitignore file"));
.extend(paths_from_gitignore.iter().map(|p| p.clone().into()));
}

let languages = get_languages_for_rules(&rules);
Expand Down
2 changes: 1 addition & 1 deletion crates/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ sha2 = { workspace = true }
uuid = { workspace = true }
# other
git2 = "0.18.2"
glob-match = "0.2.1"
globset = "0.4.14"
percent-encoding = "2.3.1"
prettytable-rs = "0.10.0"
reqwest = { version = "0.11", features = ["blocking", "json"] }
Expand Down
26 changes: 16 additions & 10 deletions crates/cli/src/config_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,14 @@ rulesets:
"go-best-practices".to_string(),
RulesetConfig {
paths: PathConfig {
only: Some(vec!["one/two".to_string(), "foo/**/*.go".to_string()]),
ignore: vec!["tres/cuatro".to_string(), "bar/**/*.go".to_string()],
only: Some(vec![
"one/two".to_string().into(),
"foo/**/*.go".to_string().into(),
]),
ignore: vec![
"tres/cuatro".to_string().into(),
"bar/**/*.go".to_string().into(),
],
},
rules: HashMap::new(),
},
Expand Down Expand Up @@ -144,7 +150,7 @@ rulesets:
"go-best-practices".to_string(),
RulesetConfig {
paths: PathConfig {
only: Some(vec!["foo".to_string()]),
only: Some(vec!["foo".to_string().into()]),
ignore: vec![],
},
..Default::default()
Expand All @@ -155,7 +161,7 @@ rulesets:
RulesetConfig {
paths: PathConfig {
only: None,
ignore: vec!["bar".to_string()],
ignore: vec!["bar".to_string().into()],
},
..Default::default()
},
Expand Down Expand Up @@ -213,8 +219,8 @@ rulesets:
"no-eval".to_string(),
RuleConfig {
paths: PathConfig {
only: Some(vec!["py/**".to_string()]),
ignore: vec!["py/insecure/**".to_string()],
only: Some(vec!["py/**".to_string().into()]),
ignore: vec!["py/insecure/**".to_string().into()],
},
},
)]),
Expand Down Expand Up @@ -294,11 +300,11 @@ max-file-size-kb: 512
let expected = ConfigFile {
rulesets: HashMap::from([("python-security".to_string(), RulesetConfig::default())]),
paths: PathConfig {
only: Some(vec!["py/**/foo/*.py".to_string()]),
only: Some(vec!["py/**/foo/*.py".to_string().into()]),
ignore: vec![
"py/testing/*.py".to_string(),
"**/test/**".to_string(),
"path1".to_string(),
"py/testing/*.py".to_string().into(),
"**/test/**".to_string().into(),
"path1".to_string().into(),
],
},
ignore_gitignore: Some(false),
Expand Down
25 changes: 12 additions & 13 deletions crates/cli/src/file_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@ use std::fs::read_to_string;
use std::path::{Path, PathBuf};

use anyhow::Result;
use glob_match::glob_match;
use sha2::{Digest, Sha256};
use walkdir::WalkDir;

use kernel::model::common::Language;
use kernel::model::violation::Violation;

use crate::model::cli_configuration::CliConfiguration;
use crate::model::config_file::PathConfig;
use crate::model::config_file::{PathConfig, PathPattern};
use crate::model::datadog_api::DiffAwareData;

static FILE_EXTENSIONS_PER_LANGUAGE_LIST: &[(Language, &[&str])] = &[
Expand Down Expand Up @@ -154,18 +153,19 @@ pub fn get_files(
Ok(files_to_return)
}

fn matches_pattern(pattern: &str, path: &str) -> bool {
if glob_match(pattern, path) {
return true;
}
return Path::new(path).starts_with(Path::new(pattern));
fn matches_pattern(pattern: &PathPattern, path: &str) -> bool {
pattern
.glob
.as_ref()
.map(|g| g.is_match(path))
.unwrap_or(false)
|| Path::new(path).starts_with(&pattern.prefix)
}

pub fn is_allowed_by_path_config(paths: &PathConfig, file_name: &str) -> bool {
if paths
.ignore
.iter()
.filter(|p| !p.is_empty())
.any(|pattern| matches_pattern(pattern, file_name))
{
return false;
Expand All @@ -174,7 +174,6 @@ pub fn is_allowed_by_path_config(paths: &PathConfig, file_name: &str) -> bool {
None => true,
Some(only) => only
.iter()
.filter(|p| !p.is_empty())
.any(|pattern| matches_pattern(pattern, file_name)),
}
}
Expand Down Expand Up @@ -594,7 +593,7 @@ mod tests {

// now, we add one glob pattern to ignore
let path_config = PathConfig {
ignore: vec!["src/**/main.rs".to_string()],
ignore: vec!["src/**/main.rs".to_string().into()],
only: None,
};
let files = get_files(&base_path, vec![], &path_config).unwrap();
Expand All @@ -612,7 +611,7 @@ mod tests {

// now, we add one path prefix to ignore
let path_config = PathConfig {
ignore: vec!["src/a".to_string()],
ignore: vec!["src/a".to_string().into()],
only: None,
};
let files = get_files(&base_path, vec![], &path_config).unwrap();
Expand All @@ -622,7 +621,7 @@ mod tests {
// now we add one glob pattern to require
let path_config = PathConfig {
ignore: vec![],
only: Some(vec!["**/other.rs".to_string()]),
only: Some(vec!["**/other.rs".to_string().into()]),
};
let files = get_files(&base_path, vec![], &path_config).unwrap();
assert_contains_files!(&base_path, files, ["src/a/other.rs", "test/a/other.rs"]);
Expand All @@ -631,7 +630,7 @@ mod tests {
// now we add one glob path prefix to require
let path_config = PathConfig {
ignore: vec![],
only: Some(vec!["src/a".to_string()]),
only: Some(vec!["src/a".to_string().into()]),
};
let files = get_files(&base_path, vec![], &path_config).unwrap();
assert_contains_files!(&base_path, files, ["src/a/main.rs", "src/a/other.rs"]);
Expand Down
14 changes: 10 additions & 4 deletions crates/cli/src/model/cli_configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,17 @@ impl CliConfiguration {
// println!("rules string: {}", rules_string.join("|"));
let full_config_string = format!(
"{}:{}:{}:{}::{}:{}",
self.path_config.ignore.join(","),
self.path_config
.only
.as_ref()
.map_or("".to_string(), |v| v.join(",")),
.ignore
.iter()
.map(|p| p.clone().into())
.collect::<Vec<String>>()
.join(","),
self.path_config.only.as_ref().map_or("".to_string(), |v| v
.iter()
.map(|p| p.clone().into())
.collect::<Vec<String>>()
.join(",")),
self.ignore_gitignore,
rules_string.join(","),
self.max_file_size_kb,
Expand Down
42 changes: 39 additions & 3 deletions crates/cli/src/model/config_file.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
use globset::{GlobBuilder, GlobMatcher};
use std::collections::HashMap;
use std::fmt;
use std::path::PathBuf;

use crate::model::serialization::{deserialize_ruleconfigs, deserialize_rulesetconfigs};

use serde;
use serde::{Deserialize, Serialize};

#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(from = "String", into = "String")]
pub struct PathPattern {
pub glob: Option<GlobMatcher>,
pub prefix: PathBuf,
}

// Lists of directories and glob patterns to include/exclude from the analysis.
#[derive(Deserialize, Serialize, Debug, PartialEq, Default, Clone)]
pub struct PathConfig {
// Analyze only these directories and patterns.
pub only: Option<Vec<String>>,
pub only: Option<Vec<PathPattern>>,
// Do not analyze any of these directories and patterns.
#[serde(default)]
pub ignore: Vec<String>,
pub ignore: Vec<PathPattern>,
}

// Configuration for a single rule.
Expand Down Expand Up @@ -79,7 +88,7 @@ impl From<RawConfigFile> for ConfigFile {
paths: {
let mut paths = value.paths;
if let Some(ignore) = value.ignore_paths {
paths.ignore.extend(ignore);
paths.ignore.extend(ignore.iter().map(|p| p.clone().into()));
}
paths
},
Expand All @@ -94,3 +103,30 @@ impl fmt::Display for ConfigFile {
write!(f, "{:?}", self)
}
}

impl From<String> for PathPattern {
fn from(value: String) -> Self {
PathPattern {
glob: GlobBuilder::new(&value)
.literal_separator(true)
.empty_alternates(true)
.backslash_escape(true)
.build()
.map(|g| g.compile_matcher())
.ok(),
prefix: PathBuf::from(value),
}
}
}

impl From<PathPattern> for String {
fn from(value: PathPattern) -> Self {
value.prefix.display().to_string()
}
}

impl PartialEq for PathPattern {
fn eq(&self, other: &Self) -> bool {
self.prefix.eq(&other.prefix)
}
}
24 changes: 12 additions & 12 deletions crates/cli/src/path_restrictions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ mod tests {
"ignores-test".to_string(),
RulesetConfig {
paths: PathConfig {
ignore: vec!["test/**".to_string()],
ignore: vec!["test/**".to_string().into()],
only: None,
},
rules: HashMap::new(),
Expand All @@ -93,7 +93,7 @@ mod tests {
RulesetConfig {
paths: PathConfig {
ignore: vec![],
only: Some(vec!["*/code/**".to_string()]),
only: Some(vec!["*/code/**".to_string().into()]),
},
rules: HashMap::new(),
},
Expand All @@ -102,8 +102,8 @@ mod tests {
"test-but-not-code".to_string(),
RulesetConfig {
paths: PathConfig {
ignore: vec!["*/code/**".to_string()],
only: Some(vec!["test/**".to_string()]),
ignore: vec!["*/code/**".to_string().into()],
only: Some(vec!["test/**".to_string().into()]),
},
rules: HashMap::new(),
},
Expand Down Expand Up @@ -136,7 +136,7 @@ mod tests {
"ignores-test".to_string(),
RuleConfig {
paths: PathConfig {
ignore: vec!["test/**".to_string()],
ignore: vec!["test/**".to_string().into()],
only: None,
},
},
Expand All @@ -146,16 +146,16 @@ mod tests {
RuleConfig {
paths: PathConfig {
ignore: vec![],
only: Some(vec!["*/code/**".to_string()]),
only: Some(vec!["*/code/**".to_string().into()]),
},
},
),
(
"test-but-not-code".to_string(),
RuleConfig {
paths: PathConfig {
ignore: vec!["*/code/**".to_string()],
only: Some(vec!["test/**".to_string()]),
ignore: vec!["*/code/**".to_string().into()],
only: Some(vec!["test/**".to_string().into()]),
},
},
),
Expand Down Expand Up @@ -186,14 +186,14 @@ mod tests {
"only-test".to_string(),
RulesetConfig {
paths: PathConfig {
only: Some(vec!["test/**".to_string()]),
only: Some(vec!["test/**".to_string().into()]),
ignore: vec![],
},
rules: HashMap::from([(
"ignores-code".to_string(),
RuleConfig {
paths: PathConfig {
ignore: vec!["*/code/**".to_string()],
ignore: vec!["*/code/**".to_string().into()],
only: None,
},
},
Expand Down Expand Up @@ -223,7 +223,7 @@ mod tests {
"only-test-starstar-foo-glob".to_string(),
RulesetConfig {
paths: PathConfig {
only: Some(vec!["test/**/foo.go".to_string()]),
only: Some(vec!["test/**/foo.go".to_string().into()]),
ignore: vec![],
},
..Default::default()
Expand All @@ -234,7 +234,7 @@ mod tests {
RulesetConfig {
paths: PathConfig {
only: None,
ignore: vec!["uno/code".to_string()],
ignore: vec!["uno/code".to_string().into()],
},
..Default::default()
},
Expand Down

0 comments on commit cfbb3a1

Please sign in to comment.