Skip to content

Commit

Permalink
fix(turborepo): avoid globbing directories due to ancestor truncation (
Browse files Browse the repository at this point in the history
…#5273)

Co-authored-by: Greg Soltis <Greg Soltis>
Co-authored-by: Chris Olszewski <chris.olszewski@vercel.com>
Co-authored-by: Nicholas Yang <nicholas.yang@vercel.com>
  • Loading branch information
3 people authored Jun 13, 2023
1 parent e323407 commit c97911b
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 40 deletions.
17 changes: 4 additions & 13 deletions crates/turborepo-ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod lockfile;

use std::{collections::HashMap, mem::ManuallyDrop, path::PathBuf};

use globwalk::{globwalk, WalkError};
use globwalk::globwalk;
pub use lockfile::{patches, subgraph, transitive_closure};
use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf};
use turborepo_env::EnvironmentVariableMap;
Expand Down Expand Up @@ -483,13 +483,13 @@ pub extern "C" fn glob(buffer: Buffer) -> Buffer {
false => globwalk::WalkType::All,
};

let iter = match globwalk(
let files = match globwalk(
&AbsoluteSystemPathBuf::new(req.base_path).expect("absolute"),
&req.include_patterns,
&req.exclude_patterns,
walk_type,
) {
Ok(iter) => iter,
Ok(files) => files,
Err(err) => {
let resp = proto::GlobResp {
response: Some(proto::glob_resp::Response::Error(err.to_string())),
Expand All @@ -498,17 +498,8 @@ pub extern "C" fn glob(buffer: Buffer) -> Buffer {
}
};

let paths = match iter.collect::<Result<Vec<_>, WalkError>>() {
Ok(paths) => paths,
Err(err) => {
let resp = proto::GlobResp {
response: Some(proto::glob_resp::Response::Error(err.to_string())),
};
return resp.into();
}
};
// TODO: is to_string_lossy the right thing to do here? We could error...
let files: Vec<_> = paths
let files: Vec<_> = files
.into_iter()
.map(|path| path.to_string_lossy().to_string())
.collect();
Expand Down
108 changes: 90 additions & 18 deletions crates/turborepo-globwalk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ mod empty_glob;

use std::{
borrow::Cow,
collections::HashSet,
io::ErrorKind,
path::{Path, PathBuf},
};

use empty_glob::InclusiveEmptyAny;
use itertools::Itertools;
use path_slash::PathExt;
use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf};
use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, PathError};
use wax::{Any, BuildError, Glob, Pattern};

#[derive(Debug, PartialEq, Clone, Copy)]
Expand Down Expand Up @@ -50,6 +51,12 @@ pub enum WalkError {
InvalidPath,
#[error("walk error: {0}")]
WalkError(#[from] walkdir::Error),
#[error(transparent)]
Path(#[from] PathError),
#[error(transparent)]
WaxWalk(#[from] wax::WalkError),
#[error("Internal error on glob {glob}: {error}")]
InternalError { glob: String, error: String },
}

/// Performs a glob walk, yielding paths that _are_ included in the include list
Expand All @@ -62,7 +69,7 @@ pub enum WalkError {
/// - collapse the path, and calculate the new base_path, which defined as
/// the longest common prefix of all the includes
/// - traversing above the root of the base_path is not allowed
pub fn globwalk(
pub fn _globwalk(
base_path: &AbsoluteSystemPath,
include: &[String],
exclude: &[String],
Expand Down Expand Up @@ -277,6 +284,74 @@ fn collapse_path(path: &str) -> Option<(Cow<str>, usize)> {
}
}

pub fn globwalk(
base_path: &AbsoluteSystemPath,
include: &[String],
exclude: &[String],
walk_type: WalkType,
) -> Result<HashSet<AbsoluteSystemPathBuf>, WalkError> {
let (base_path_new, include_paths, exclude_paths) =
preprocess_paths_and_globs(base_path, include, exclude)?;
let inc_patterns = include_paths
.iter()
.map(|g| Glob::new(g.as_str()).map_err(|e| e.into()))
.collect::<Result<Vec<_>, WalkError>>()?;
let ex_patterns = exclude_paths
.iter()
.map(|g| Glob::new(g.as_str()))
.collect::<Result<Vec<_>, _>>()?;

let result = inc_patterns
.into_iter()
.flat_map(|glob| {
// Check if the glob specifies an exact filename with no meta characters.
if let Some(prefix) = glob.variance().path() {
// We expect all of our globs to be absolute paths (asserted above)
assert!(prefix.is_absolute(), "Found relative glob path {}", glob);
// We're either going to return this path or nothing. Check if it's a directory
// and if we want directories
match AbsoluteSystemPathBuf::new(prefix).and_then(|path| {
let metadata = path.symlink_metadata()?;
Ok((path, metadata))
}) {
Err(e) if e.is_io_error(ErrorKind::NotFound) => {
// If the file doesn't exist, it's not an error, there's just nothing to
// glob
vec![]
}
Err(e) => vec![Err(e.into())],
Ok((_, md)) if walk_type == WalkType::Files && md.is_dir() => {
vec![]
}
Ok((path, _)) => vec![Ok(path)],
}
} else {
glob.walk(&base_path_new)
.not(ex_patterns.iter().cloned())
// Per docs, only fails if exclusion list is too large, since we're using
// pre-compiled globs
.unwrap_or_else(|e| {
panic!(
"Failed to compile exclusion globs: {:?}: {}",
ex_patterns, e,
)
})
.filter_map(|entry| match entry {
Ok(entry) if walk_type == WalkType::Files && entry.file_type().is_dir() => {
None
}
Ok(entry) => {
Some(AbsoluteSystemPathBuf::new(entry.path()).map_err(|e| e.into()))
}
Err(e) => Some(Err(e.into())),
})
.collect::<Vec<_>>()
}
})
.collect::<Result<HashSet<_>, WalkError>>()?;
Ok(result)
}

#[cfg(test)]
mod test {
use std::{collections::HashSet, path::Path};
Expand Down Expand Up @@ -512,7 +587,9 @@ mod test {
#[test_case("**/*.txt", 1, 1 => matches None)]
#[test_case("**/【*", 1, 1 => matches None)]
// in the go implementation, broken-symlink is yielded,
// however in symlink mode, walkdir yields broken symlinks as errors
// however in symlink mode, walkdir yields broken symlinks as errors.
// Note that walkdir _always_ follows root symlinks. We handle this in the layer
// above wax.
#[test_case("broken-symlink", 1, 1 => matches None ; "broken symlinks should be yielded")]
// globs that match across a symlink should not follow the symlink
#[test_case("working-symlink/c/*", 0, 0 => matches None ; "working symlink should not be followed")]
Expand Down Expand Up @@ -556,11 +633,10 @@ mod test {
let dir = setup();

let path = AbsoluteSystemPathBuf::new(dir.path()).unwrap();
let (success, _error): (Vec<AbsoluteSystemPathBuf>, Vec<_>) =
match super::globwalk(&path, &[pattern.into()], &[], crate::WalkType::All) {
Ok(e) => e.into_iter().partition_result(),
Err(e) => return Some(e),
};
let success = match super::globwalk(&path, &[pattern.into()], &[], crate::WalkType::All) {
Ok(e) => e.into_iter(),
Err(e) => return Some(e),
};

assert_eq!(
success.len(),
Expand Down Expand Up @@ -1128,10 +1204,7 @@ mod test {
(crate::WalkType::Files, expected_files),
(crate::WalkType::All, expected),
] {
let (success, _): (Vec<AbsoluteSystemPathBuf>, Vec<_>) =
super::globwalk(&path, &include, &exclude, walk_type)
.unwrap()
.partition_result();
let success = super::globwalk(&path, &include, &exclude, walk_type).unwrap();

let success = success
.iter()
Expand Down Expand Up @@ -1220,12 +1293,11 @@ mod test {
let child = root.join_component("child");
let include = &["../*-file".to_string()];
let exclude = &[];
let iter = globwalk(&child, include, exclude, WalkType::Files).unwrap();
let iter = globwalk(&child, include, exclude, WalkType::Files)
.unwrap()
.into_iter();
let results = iter
.map(|entry| {
let entry = entry.unwrap();
root.anchor(entry).unwrap().to_str().unwrap().to_string()
})
.map(|entry| root.anchor(entry).unwrap().to_str().unwrap().to_string())
.collect::<Vec<_>>();
let expected = vec!["root-file".to_string()];
assert_eq!(results, expected);
Expand All @@ -1250,8 +1322,8 @@ mod test {
let exclude = &["apps/ignored".to_string(), "**/node_modules/**".to_string()];
let iter = globwalk(&root, include, exclude, WalkType::Files).unwrap();
let paths = iter
.into_iter()
.map(|path| {
let path = path.unwrap();
let relative = root.anchor(path).unwrap();
relative.to_str().unwrap().to_string()
})
Expand Down
9 changes: 3 additions & 6 deletions crates/turborepo-lib/src/package_manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,19 +394,16 @@ impl PackageManager {
pub fn get_package_jsons(
&self,
repo_root: &AbsoluteSystemPath,
) -> Result<Vec<AbsoluteSystemPathBuf>, Error> {
) -> Result<impl Iterator<Item = AbsoluteSystemPathBuf>, Error> {
let globs = self.get_workspace_globs(repo_root)?;

let walker = globwalk::globwalk(
let files = globwalk::globwalk(
repo_root,
&globs.package_json_inclusions,
&globs.raw_exclusions,
globwalk::WalkType::Files,
)?;
let items = walker
.map(|result| result.map_err(|e| e.into()))
.collect::<Result<Vec<_>, Error>>()?;
Ok(items)
Ok(files.into_iter())
}
}

Expand Down
6 changes: 3 additions & 3 deletions crates/turborepo-scm/src/package_deps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ pub fn get_package_file_hashes_from_inputs<S: AsRef<str>>(
Either::Left([package_unix_path, raw_glob.as_ref()].join("/"))
}
});
let iter = globwalk::globwalk(
let files = globwalk::globwalk(
turbo_root,
&inclusions,
&exclusions,
globwalk::WalkType::Files,
)?;
let to_hash = iter
let to_hash = files
.iter()
.map(|entry| {
let entry = entry?;
let path = git_root.anchor(entry)?.to_unix()?;
Ok(path)
})
Expand Down
41 changes: 41 additions & 0 deletions crates/turborepo-wax/src/walk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,15 @@ macro_rules! walk {
.strip_prefix(&$state.prefix)
.expect("path is not in tree");
let depth = entry.depth().saturating_sub(1);
// Globs don't include the root token, but absolute paths do.
// Skip that token so that matching up components will work below.
for candidate in path
.components()
.filter(|c| !matches!(c, Component::RootDir))
.skip(depth)
.filter_map(|component| match component {
Component::Normal(component) => Some(CandidatePath::from(component)),
Component::Prefix(component) => Some(CandidatePath::from(component.as_os_str())),
_ => None,
})
.zip_longest($state.components.iter().skip(depth))
Expand Down Expand Up @@ -570,9 +574,22 @@ pub struct Walk<'g> {
root: PathBuf,
prefix: PathBuf,
walk: walkdir::IntoIter,
// This is a hack to express an empty iterator
is_empty: bool,
}

impl<'g> Walk<'g> {
fn empty() -> Self {
Self {
pattern: Cow::Owned(Regex::new("").unwrap()),
components: vec![],
root: PathBuf::new(),
prefix: PathBuf::new(),
walk: walkdir::WalkDir::new(PathBuf::new()).into_iter(),
is_empty: true,
}
}

fn compile<'t, I>(tokens: I) -> Result<Vec<Regex>, CompileError>
where
I: IntoIterator<Item = &'t Token<'t>>,
Expand Down Expand Up @@ -602,13 +619,15 @@ impl<'g> Walk<'g> {
root,
prefix,
walk,
is_empty,
} = self;
Walk {
pattern: Cow::Owned(pattern.into_owned()),
components,
root,
prefix,
walk,
is_empty,
}
}

Expand Down Expand Up @@ -704,6 +723,9 @@ impl Iterator for Walk<'_> {
type Item = WalkItem<'static>;

fn next(&mut self) -> Option<Self::Item> {
if self.is_empty {
return None;
}
walk!(self => |entry| {
return Some(entry.map(WalkEntry::into_owned));
});
Expand Down Expand Up @@ -907,6 +929,24 @@ pub fn walk<'g>(
}
},
);
if matches!(link, LinkBehavior::ReadFile) {
if let Ok(tail) = root.strip_prefix(directory) {
let found = tail
.components()
.try_fold(directory.to_path_buf(), |accum, c| {
let candidate = accum.join(c);
if candidate.is_symlink() {
None
} else {
Some(candidate)
}
})
.is_none();
if found {
return Walk::empty();
}
}
}
let components =
Walk::compile(glob.tree.as_ref().tokens()).expect("failed to compile glob sub-expressions");
Walk {
Expand All @@ -921,6 +961,7 @@ pub fn walk<'g>(
})
.max_depth(depth)
.into_iter(),
is_empty: false,
}
}

Expand Down

0 comments on commit c97911b

Please sign in to comment.