Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POSIX compliant globs #151

Merged
merged 5 commits into from
Mar 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ authors = ["uutils developers"]
[dependencies]
chrono = "0.4"
clap = "2.34"
glob = "0.3"
walkdir = "2.3"
regex = "1.5"
once_cell = "1.9"
Expand Down
3 changes: 0 additions & 3 deletions src/find/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.

extern crate findutils;
extern crate glob;

fn main() {
let args = std::env::args().collect::<Vec<String>>();
let strs: Vec<&str> = args.iter().map(|s| s.as_ref()).collect();
Expand Down
238 changes: 238 additions & 0 deletions src/find/matchers/glob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
// Copyright 2022 Tavian Barnes
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.

use onig::{self, Regex, RegexOptions, Syntax};

/// Parse a string as a POSIX Basic Regular Expression.
fn parse_bre(expr: &str, options: RegexOptions) -> Result<Regex, onig::Error> {
let bre = Syntax::posix_basic();
Regex::with_options(expr, bre.options() | options, bre)
}

/// Push a literal character onto a regex, escaping it if necessary.
fn regex_push_literal(regex: &mut String, ch: char) {
// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_03
if matches!(ch, '.' | '[' | '\\' | '*' | '^' | '$') {
regex.push('\\');
}
regex.push(ch);
}

/// Extracts a bracket expression from a glob.
fn extract_bracket_expr(pattern: &str) -> Option<(String, &str)> {
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13_01
//
// If an open bracket introduces a bracket expression as in XBD RE Bracket Expression,
// except that the <exclamation-mark> character ( '!' ) shall replace the <circumflex>
// character ( '^' ) in its role in a non-matching list in the regular expression notation,
// it shall introduce a pattern bracket expression. A bracket expression starting with an
// unquoted <circumflex> character produces unspecified results. Otherwise, '[' shall match
// the character itself.
//
// To check for valid bracket expressions, we scan for the closing bracket and
// attempt to parse that segment as a regex. If that fails, we treat the '['
// literally.

let mut expr = "[".to_string();

let mut chars = pattern.chars();
let mut next = chars.next();

// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
//
// 3. A non-matching list expression begins with a <circumflex> ( '^' ) ...
//
// (but in a glob, '!' is used instead of '^')
if next == Some('!') {
expr.push('^');
next = chars.next();
}

// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
//
// 1. ... The <right-square-bracket> ( ']' ) shall lose its special meaning and represent
// itself in a bracket expression if it occurs first in the list (after an initial
// <circumflex> ( '^' ), if any).
if next == Some(']') {
expr.push(']');
next = chars.next();
}

while let Some(ch) = next {
expr.push(ch);

match ch {
'[' => {
// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
//
// 4. A collating symbol is a collating element enclosed within bracket-period
// ( "[." and ".]" ) delimiters. ...
//
// 5. An equivalence class expression shall ... be expressed by enclosing any
// one of the collating elements in the equivalence class within bracket-
// equal ( "[=" and "=]" ) delimiters.
//
// 6. ... A character class expression is expressed as a character class name
// enclosed within bracket- <colon> ( "[:" and ":]" ) delimiters.
next = chars.next();
if let Some(delim) = next {
expr.push(delim);

if matches!(delim, '.' | '=' | ':') {
let rest = chars.as_str();
let end = rest.find([delim, ']'])? + 2;
expr.push_str(&rest[..end]);
chars = rest[end..].chars();
}
}
}
']' => {
// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
//
// 1. ... The <right-square-bracket> ( ']' ) shall ... terminate the bracket
// expression, unless it appears in a collating symbol (such as "[.].]" ) or is
// the ending <right-square-bracket> for a collating symbol, equivalence class,
// or character class.
break;
}
_ => {}
}

next = chars.next();
}

if parse_bre(&expr, RegexOptions::REGEX_OPTION_NONE).is_ok() {
Some((expr, chars.as_str()))
} else {
None
}
}

/// Converts a POSIX glob into a POSIX Basic Regular Expression
fn glob_to_regex(pattern: &str) -> String {
let mut regex = String::new();

let mut chars = pattern.chars();
while let Some(ch) = chars.next() {
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
match ch {
'?' => regex.push('.'),
'*' => regex.push_str(".*"),
'\\' => {
if let Some(ch) = chars.next() {
regex_push_literal(&mut regex, ch);
} else {
// https://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
//
// If pattern ends with an unescaped <backslash>, fnmatch() shall return a
// non-zero value (indicating either no match or an error).
//
// Most implementations return FNM_NOMATCH in this case, so return a regex that
// never matches.
return "$.".to_string();
}
}
'[' => {
if let Some((expr, rest)) = extract_bracket_expr(chars.as_str()) {
regex.push_str(&expr);
chars = rest.chars();
} else {
regex_push_literal(&mut regex, ch);
}
}
_ => regex_push_literal(&mut regex, ch),
}
}

regex
}

/// An fnmatch()-style glob matcher.
pub struct Pattern {
regex: Regex,
}

impl Pattern {
/// Parse an fnmatch()-style glob.
pub fn new(pattern: &str, caseless: bool) -> Self {
let options = if caseless {
RegexOptions::REGEX_OPTION_IGNORECASE
} else {
RegexOptions::REGEX_OPTION_NONE
};

// As long as glob_to_regex() is correct, this should never fail
let regex = parse_bre(&glob_to_regex(pattern), options).unwrap();
Self { regex }
}

/// Test if this patern matches a string.
pub fn matches(&self, string: &str) -> bool {
self.regex.is_match(string)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn literals() {
assert_eq!(glob_to_regex(r"foo.bar"), r"foo\.bar");
}

#[test]
fn regex_special() {
assert_eq!(glob_to_regex(r"^foo.bar$"), r"\^foo\.bar\$");
}

#[test]
fn wildcards() {
assert_eq!(glob_to_regex(r"foo?bar*baz"), r"foo.bar.*baz");
}

#[test]
fn escapes() {
assert_eq!(glob_to_regex(r"fo\o\?bar\*baz\\"), r"foo?bar\*baz\\");
}

#[test]
fn incomplete_escape() {
assert_eq!(glob_to_regex(r"foo\"), r"$.")
}

#[test]
fn valid_brackets() {
assert_eq!(glob_to_regex(r"foo[bar][!baz]"), r"foo[bar][^baz]");
}

#[test]
fn complex_brackets() {
assert_eq!(
glob_to_regex(r"[!]!.*[\[.].][=]=][:space:]-]"),
r"[^]!.*[\[.].][=]=][:space:]-]"
);
}

#[test]
fn invalid_brackets() {
assert_eq!(glob_to_regex(r"foo[bar[!baz"), r"foo\[bar\[!baz");
}

#[test]
fn pattern_matches() {
assert!(Pattern::new(r"foo*bar", false).matches("foo--bar"));

assert!(!Pattern::new(r"foo*bar", false).matches("bar--foo"));
}

#[test]
fn caseless_matches() {
assert!(Pattern::new(r"foo*BAR", true).matches("FOO--bar"));

assert!(!Pattern::new(r"foo*BAR", true).matches("BAR--foo"));
}
}
107 changes: 107 additions & 0 deletions src/find/matchers/lname.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2017 Google Inc.
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.

use std::io::{stderr, Write};
use std::path::PathBuf;

use walkdir::DirEntry;

use super::glob::Pattern;
use super::{Matcher, MatcherIO};

fn read_link_target(file_info: &DirEntry) -> Option<PathBuf> {
match file_info.path().read_link() {
Ok(target) => Some(target),
Err(err) => {
// If it's not a symlink, then it's not an error that should be
// shown.
if err.kind() != std::io::ErrorKind::InvalidInput {
writeln!(
&mut stderr(),
"Error reading target of {}: {}",
file_info.path().display(),
err
)
.unwrap();
}

None
}
}
}

/// This matcher makes a comparison of the link target against a shell wildcard
/// pattern. See `glob::Pattern` for details on the exact syntax.
pub struct LinkNameMatcher {
pattern: Pattern,
}

impl LinkNameMatcher {
pub fn new(pattern_string: &str, caseless: bool) -> LinkNameMatcher {
let pattern = Pattern::new(pattern_string, caseless);
Self { pattern }
}
}

impl Matcher for LinkNameMatcher {
fn matches(&self, file_info: &DirEntry, _: &mut MatcherIO) -> bool {
if let Some(target) = read_link_target(file_info) {
self.pattern.matches(&target.to_string_lossy())
} else {
false
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::find::matchers::tests::get_dir_entry_for;
use crate::find::matchers::Matcher;
use crate::find::tests::FakeDependencies;

use std::io::ErrorKind;

#[cfg(unix)]
use std::os::unix::fs::symlink;
#[cfg(windows)]
use std::os::windows::fs::symlink_file;

fn create_file_link() {
#[cfg(unix)]
if let Err(e) = symlink("abbbc", "test_data/links/link-f") {
if e.kind() != ErrorKind::AlreadyExists {
panic!("Failed to create sym link: {:?}", e);
}
}
#[cfg(windows)]
if let Err(e) = symlink_file("abbbc", "test_data/links/link-f") {
if e.kind() != ErrorKind::AlreadyExists {
panic!("Failed to create sym link: {:?}", e);
}
}
}

#[test]
fn matches_against_link_target() {
create_file_link();

let link_f = get_dir_entry_for("test_data/links", "link-f");
let matcher = LinkNameMatcher::new("ab?bc", false);
let deps = FakeDependencies::new();
assert!(matcher.matches(&link_f, &mut deps.new_matcher_io()));
}

#[test]
fn caseless_matches_against_link_target() {
create_file_link();

let link_f = get_dir_entry_for("test_data/links", "link-f");
let matcher = LinkNameMatcher::new("AbB?c", true);
let deps = FakeDependencies::new();
assert!(matcher.matches(&link_f, &mut deps.new_matcher_io()));
}
}
Loading