Skip to content

Commit

Permalink
Cache information about file type
Browse files Browse the repository at this point in the history
This commit adds a cache that remembers whether a given path is a file or a directory, based on the results of `std::fs::read_dir`. This reduces the number of executed syscalls and improves the performance of the library.
  • Loading branch information
Kobzol committed Jan 2, 2024
1 parent 7b29204 commit ccac2b3
Showing 1 changed file with 61 additions and 19 deletions.
80 changes: 61 additions & 19 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ use std::cmp;
use std::error::Error;
use std::fmt;
use std::fs;
use std::fs::DirEntry;
use std::io;
use std::ops::Deref;
use std::path::{self, Component, Path, PathBuf};
use std::str::FromStr;

Expand All @@ -96,8 +98,8 @@ pub struct Paths {
dir_patterns: Vec<Pattern>,
require_dir: bool,
options: MatchOptions,
todo: Vec<Result<(PathBuf, usize), GlobError>>,
scope: Option<PathBuf>,
todo: Vec<Result<(PathWrapper, usize), GlobError>>,
scope: Option<PathWrapper>,
}

/// Return an iterator that produces all the `Path`s that match the given
Expand Down Expand Up @@ -242,6 +244,7 @@ pub fn glob_with(pattern: &str, options: MatchOptions) -> Result<Paths, PatternE
}

let scope = root.map_or_else(|| PathBuf::from("."), to_scope);
let scope = PathWrapper::from_path(scope);

let mut dir_patterns = Vec::new();
let components =
Expand Down Expand Up @@ -323,8 +326,44 @@ impl fmt::Display for GlobError {
}
}

fn is_dir(p: &Path) -> bool {
fs::metadata(p).map(|m| m.is_dir()).unwrap_or(false)
#[derive(Debug)]
struct PathWrapper {
path: PathBuf,
is_directory: bool,
}

impl PathWrapper {
fn from_dir_entry(path: PathBuf, e: DirEntry) -> Self {
let is_directory = e
.file_type()
.ok()
.map(|file_type| file_type.is_dir())
.or_else(|| fs::metadata(&path).map(|m| m.is_dir()).ok())
.unwrap_or(false);
Self { path, is_directory }
}
fn from_path(path: PathBuf) -> Self {
let is_directory = fs::metadata(&path).map(|m| m.is_dir()).unwrap_or(false);
Self { path, is_directory }
}

fn into_path(self) -> PathBuf {
self.path
}
}

impl Deref for PathWrapper {
type Target = Path;

fn deref(&self) -> &Self::Target {
self.path.deref()
}
}

impl AsRef<Path> for PathWrapper {
fn as_ref(&self) -> &Path {
self.path.as_ref()
}
}

/// An alias for a glob iteration result.
Expand Down Expand Up @@ -363,10 +402,10 @@ impl Iterator for Paths {
// idx -1: was already checked by fill_todo, maybe path was '.' or
// '..' that we can't match here because of normalization.
if idx == !0 as usize {
if self.require_dir && !is_dir(&path) {
if self.require_dir && !path.is_directory {
continue;
}
return Some(Ok(path));
return Some(Ok(path.into_path()));
}

if self.dir_patterns[idx].is_recursive {
Expand All @@ -379,7 +418,7 @@ impl Iterator for Paths {
next += 1;
}

if is_dir(&path) {
if path.is_directory {
// the path is a directory, so it's a match

// push this directory's contents
Expand All @@ -394,7 +433,7 @@ impl Iterator for Paths {
if next == self.dir_patterns.len() - 1 {
// pattern ends in recursive pattern, so return this
// directory as a result
return Some(Ok(path));
return Some(Ok(path.into_path()));
} else {
// advanced to the next pattern for this path
idx = next + 1;
Expand Down Expand Up @@ -427,8 +466,8 @@ impl Iterator for Paths {
// *AND* its children so we don't need to check the
// children

if !self.require_dir || is_dir(&path) {
return Some(Ok(path));
if !self.require_dir || path.is_directory {
return Some(Ok(path.into_path()));
}
} else {
fill_todo(
Expand Down Expand Up @@ -817,10 +856,10 @@ impl Pattern {
// special-casing patterns to match `.` and `..`, and avoiding `readdir()`
// calls when there are no metacharacters in the pattern.
fn fill_todo(
todo: &mut Vec<Result<(PathBuf, usize), GlobError>>,
todo: &mut Vec<Result<(PathWrapper, usize), GlobError>>,
patterns: &[Pattern],
idx: usize,
path: &Path,
path: &PathWrapper,
options: MatchOptions,
) {
// convert a pattern that's just many Char(_) to a string
Expand All @@ -836,7 +875,7 @@ fn fill_todo(
Some(s)
}

let add = |todo: &mut Vec<_>, next_path: PathBuf| {
let add = |todo: &mut Vec<_>, next_path: PathWrapper| {
if idx + 1 == patterns.len() {
// We know it's good, so don't make the iterator match this path
// against the pattern again. In particular, it can't match
Expand All @@ -848,8 +887,8 @@ fn fill_todo(
};

let pattern = &patterns[idx];
let is_dir = is_dir(path);
let curdir = path == Path::new(".");
let is_dir = path.is_directory;
let curdir = path.as_ref() == Path::new(".");
match pattern_as_str(pattern) {
Some(s) => {
// This pattern component doesn't have any metacharacters, so we
Expand All @@ -863,6 +902,7 @@ fn fill_todo(
} else {
path.join(&s)
};
let next_path = PathWrapper::from_path(next_path);
if (special && is_dir)
|| (!special
&& (fs::metadata(&next_path).is_ok()
Expand All @@ -875,19 +915,21 @@ fn fill_todo(
let dirs = fs::read_dir(path).and_then(|d| {
d.map(|e| {
e.map(|e| {
if curdir {
let path = if curdir {
PathBuf::from(e.path().file_name().unwrap())
} else {
e.path()
}
};
PathWrapper::from_dir_entry(path, e)
})
})
.collect::<Result<Vec<_>, _>>()
});
match dirs {
Ok(mut children) => {
if options.require_literal_leading_dot {
children.retain(|x| !x.file_name().unwrap().to_str().unwrap().starts_with("."));
children
.retain(|x| !x.file_name().unwrap().to_str().unwrap().starts_with("."));
}
children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name()));
todo.extend(children.into_iter().map(|x| Ok((x, idx))));
Expand All @@ -900,7 +942,7 @@ fn fill_todo(
if !pattern.tokens.is_empty() && pattern.tokens[0] == Char('.') {
for &special in &[".", ".."] {
if pattern.matches_with(special, options) {
add(todo, path.join(special));
add(todo, PathWrapper::from_path(path.join(special)));
}
}
}
Expand Down

0 comments on commit ccac2b3

Please sign in to comment.