diff --git a/crates/turborepo-ffi/src/lib.rs b/crates/turborepo-ffi/src/lib.rs index b5db90dde9d6a..996c02d502439 100644 --- a/crates/turborepo-ffi/src/lib.rs +++ b/crates/turborepo-ffi/src/lib.rs @@ -6,7 +6,7 @@ mod lockfile; use std::{collections::HashMap, mem::ManuallyDrop, path::PathBuf}; -use globwalk::{globwalk, WalkError}; +use globwalk::globwalk; pub use lockfile::{patches, subgraph, transitive_closure}; use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf}; use turborepo_env::EnvironmentVariableMap; @@ -483,13 +483,13 @@ pub extern "C" fn glob(buffer: Buffer) -> Buffer { false => globwalk::WalkType::All, }; - let iter = match globwalk( + let files = match globwalk( &AbsoluteSystemPathBuf::new(req.base_path).expect("absolute"), &req.include_patterns, &req.exclude_patterns, walk_type, ) { - Ok(iter) => iter, + Ok(files) => files, Err(err) => { let resp = proto::GlobResp { response: Some(proto::glob_resp::Response::Error(err.to_string())), @@ -498,17 +498,8 @@ pub extern "C" fn glob(buffer: Buffer) -> Buffer { } }; - let paths = match iter.collect::, WalkError>>() { - Ok(paths) => paths, - Err(err) => { - let resp = proto::GlobResp { - response: Some(proto::glob_resp::Response::Error(err.to_string())), - }; - return resp.into(); - } - }; // TODO: is to_string_lossy the right thing to do here? We could error... - let files: Vec<_> = paths + let files: Vec<_> = files .into_iter() .map(|path| path.to_string_lossy().to_string()) .collect(); diff --git a/crates/turborepo-globwalk/src/lib.rs b/crates/turborepo-globwalk/src/lib.rs index 6d1f2d040fe7f..864fdefc02479 100644 --- a/crates/turborepo-globwalk/src/lib.rs +++ b/crates/turborepo-globwalk/src/lib.rs @@ -4,6 +4,7 @@ mod empty_glob; use std::{ borrow::Cow, + collections::HashSet, io::ErrorKind, path::{Path, PathBuf}, }; @@ -11,7 +12,7 @@ use std::{ use empty_glob::InclusiveEmptyAny; use itertools::Itertools; use path_slash::PathExt; -use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf}; +use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, PathError}; use wax::{Any, BuildError, Glob, Pattern}; #[derive(Debug, PartialEq, Clone, Copy)] @@ -50,6 +51,12 @@ pub enum WalkError { InvalidPath, #[error("walk error: {0}")] WalkError(#[from] walkdir::Error), + #[error(transparent)] + Path(#[from] PathError), + #[error(transparent)] + WaxWalk(#[from] wax::WalkError), + #[error("Internal error on glob {glob}: {error}")] + InternalError { glob: String, error: String }, } /// Performs a glob walk, yielding paths that _are_ included in the include list @@ -62,7 +69,7 @@ pub enum WalkError { /// - collapse the path, and calculate the new base_path, which defined as /// the longest common prefix of all the includes /// - traversing above the root of the base_path is not allowed -pub fn globwalk( +pub fn _globwalk( base_path: &AbsoluteSystemPath, include: &[String], exclude: &[String], @@ -277,6 +284,74 @@ fn collapse_path(path: &str) -> Option<(Cow, usize)> { } } +pub fn globwalk( + base_path: &AbsoluteSystemPath, + include: &[String], + exclude: &[String], + walk_type: WalkType, +) -> Result, WalkError> { + let (base_path_new, include_paths, exclude_paths) = + preprocess_paths_and_globs(base_path, include, exclude)?; + let inc_patterns = include_paths + .iter() + .map(|g| Glob::new(g.as_str()).map_err(|e| e.into())) + .collect::, WalkError>>()?; + let ex_patterns = exclude_paths + .iter() + .map(|g| Glob::new(g.as_str())) + .collect::, _>>()?; + + let result = inc_patterns + .into_iter() + .flat_map(|glob| { + // Check if the glob specifies an exact filename with no meta characters. + if let Some(prefix) = glob.variance().path() { + // We expect all of our globs to be absolute paths (asserted above) + assert!(prefix.is_absolute(), "Found relative glob path {}", glob); + // We're either going to return this path or nothing. Check if it's a directory + // and if we want directories + match AbsoluteSystemPathBuf::new(prefix).and_then(|path| { + let metadata = path.symlink_metadata()?; + Ok((path, metadata)) + }) { + Err(e) if e.is_io_error(ErrorKind::NotFound) => { + // If the file doesn't exist, it's not an error, there's just nothing to + // glob + vec![] + } + Err(e) => vec![Err(e.into())], + Ok((_, md)) if walk_type == WalkType::Files && md.is_dir() => { + vec![] + } + Ok((path, _)) => vec![Ok(path)], + } + } else { + glob.walk(&base_path_new) + .not(ex_patterns.iter().cloned()) + // Per docs, only fails if exclusion list is too large, since we're using + // pre-compiled globs + .unwrap_or_else(|e| { + panic!( + "Failed to compile exclusion globs: {:?}: {}", + ex_patterns, e, + ) + }) + .filter_map(|entry| match entry { + Ok(entry) if walk_type == WalkType::Files && entry.file_type().is_dir() => { + None + } + Ok(entry) => { + Some(AbsoluteSystemPathBuf::new(entry.path()).map_err(|e| e.into())) + } + Err(e) => Some(Err(e.into())), + }) + .collect::>() + } + }) + .collect::, WalkError>>()?; + Ok(result) +} + #[cfg(test)] mod test { use std::{collections::HashSet, path::Path}; @@ -512,7 +587,9 @@ mod test { #[test_case("**/*.txt", 1, 1 => matches None)] #[test_case("**/【*", 1, 1 => matches None)] // in the go implementation, broken-symlink is yielded, - // however in symlink mode, walkdir yields broken symlinks as errors + // however in symlink mode, walkdir yields broken symlinks as errors. + // Note that walkdir _always_ follows root symlinks. We handle this in the layer + // above wax. #[test_case("broken-symlink", 1, 1 => matches None ; "broken symlinks should be yielded")] // globs that match across a symlink should not follow the symlink #[test_case("working-symlink/c/*", 0, 0 => matches None ; "working symlink should not be followed")] @@ -556,11 +633,10 @@ mod test { let dir = setup(); let path = AbsoluteSystemPathBuf::new(dir.path()).unwrap(); - let (success, _error): (Vec, Vec<_>) = - match super::globwalk(&path, &[pattern.into()], &[], crate::WalkType::All) { - Ok(e) => e.into_iter().partition_result(), - Err(e) => return Some(e), - }; + let success = match super::globwalk(&path, &[pattern.into()], &[], crate::WalkType::All) { + Ok(e) => e.into_iter(), + Err(e) => return Some(e), + }; assert_eq!( success.len(), @@ -1128,10 +1204,7 @@ mod test { (crate::WalkType::Files, expected_files), (crate::WalkType::All, expected), ] { - let (success, _): (Vec, Vec<_>) = - super::globwalk(&path, &include, &exclude, walk_type) - .unwrap() - .partition_result(); + let success = super::globwalk(&path, &include, &exclude, walk_type).unwrap(); let success = success .iter() @@ -1220,12 +1293,11 @@ mod test { let child = root.join_component("child"); let include = &["../*-file".to_string()]; let exclude = &[]; - let iter = globwalk(&child, include, exclude, WalkType::Files).unwrap(); + let iter = globwalk(&child, include, exclude, WalkType::Files) + .unwrap() + .into_iter(); let results = iter - .map(|entry| { - let entry = entry.unwrap(); - root.anchor(entry).unwrap().to_str().unwrap().to_string() - }) + .map(|entry| root.anchor(entry).unwrap().to_str().unwrap().to_string()) .collect::>(); let expected = vec!["root-file".to_string()]; assert_eq!(results, expected); @@ -1250,8 +1322,8 @@ mod test { let exclude = &["apps/ignored".to_string(), "**/node_modules/**".to_string()]; let iter = globwalk(&root, include, exclude, WalkType::Files).unwrap(); let paths = iter + .into_iter() .map(|path| { - let path = path.unwrap(); let relative = root.anchor(path).unwrap(); relative.to_str().unwrap().to_string() }) diff --git a/crates/turborepo-lib/src/package_manager/mod.rs b/crates/turborepo-lib/src/package_manager/mod.rs index baf5c2eafe25b..1eecb2e62775e 100644 --- a/crates/turborepo-lib/src/package_manager/mod.rs +++ b/crates/turborepo-lib/src/package_manager/mod.rs @@ -394,19 +394,16 @@ impl PackageManager { pub fn get_package_jsons( &self, repo_root: &AbsoluteSystemPath, - ) -> Result, Error> { + ) -> Result, Error> { let globs = self.get_workspace_globs(repo_root)?; - let walker = globwalk::globwalk( + let files = globwalk::globwalk( repo_root, &globs.package_json_inclusions, &globs.raw_exclusions, globwalk::WalkType::Files, )?; - let items = walker - .map(|result| result.map_err(|e| e.into())) - .collect::, Error>>()?; - Ok(items) + Ok(files.into_iter()) } } diff --git a/crates/turborepo-scm/src/package_deps.rs b/crates/turborepo-scm/src/package_deps.rs index 2f4422b1c8886..782d9abb99a1b 100644 --- a/crates/turborepo-scm/src/package_deps.rs +++ b/crates/turborepo-scm/src/package_deps.rs @@ -65,15 +65,15 @@ pub fn get_package_file_hashes_from_inputs>( Either::Left([package_unix_path, raw_glob.as_ref()].join("/")) } }); - let iter = globwalk::globwalk( + let files = globwalk::globwalk( turbo_root, &inclusions, &exclusions, globwalk::WalkType::Files, )?; - let to_hash = iter + let to_hash = files + .iter() .map(|entry| { - let entry = entry?; let path = git_root.anchor(entry)?.to_unix()?; Ok(path) }) diff --git a/crates/turborepo-wax/src/walk.rs b/crates/turborepo-wax/src/walk.rs index 597618c7df1a3..af6ac8dccef03 100644 --- a/crates/turborepo-wax/src/walk.rs +++ b/crates/turborepo-wax/src/walk.rs @@ -143,11 +143,15 @@ macro_rules! walk { .strip_prefix(&$state.prefix) .expect("path is not in tree"); let depth = entry.depth().saturating_sub(1); + // Globs don't include the root token, but absolute paths do. + // Skip that token so that matching up components will work below. for candidate in path .components() + .filter(|c| !matches!(c, Component::RootDir)) .skip(depth) .filter_map(|component| match component { Component::Normal(component) => Some(CandidatePath::from(component)), + Component::Prefix(component) => Some(CandidatePath::from(component.as_os_str())), _ => None, }) .zip_longest($state.components.iter().skip(depth)) @@ -570,9 +574,22 @@ pub struct Walk<'g> { root: PathBuf, prefix: PathBuf, walk: walkdir::IntoIter, + // This is a hack to express an empty iterator + is_empty: bool, } impl<'g> Walk<'g> { + fn empty() -> Self { + Self { + pattern: Cow::Owned(Regex::new("").unwrap()), + components: vec![], + root: PathBuf::new(), + prefix: PathBuf::new(), + walk: walkdir::WalkDir::new(PathBuf::new()).into_iter(), + is_empty: true, + } + } + fn compile<'t, I>(tokens: I) -> Result, CompileError> where I: IntoIterator>, @@ -602,6 +619,7 @@ impl<'g> Walk<'g> { root, prefix, walk, + is_empty, } = self; Walk { pattern: Cow::Owned(pattern.into_owned()), @@ -609,6 +627,7 @@ impl<'g> Walk<'g> { root, prefix, walk, + is_empty, } } @@ -704,6 +723,9 @@ impl Iterator for Walk<'_> { type Item = WalkItem<'static>; fn next(&mut self) -> Option { + if self.is_empty { + return None; + } walk!(self => |entry| { return Some(entry.map(WalkEntry::into_owned)); }); @@ -907,6 +929,24 @@ pub fn walk<'g>( } }, ); + if matches!(link, LinkBehavior::ReadFile) { + if let Ok(tail) = root.strip_prefix(directory) { + let found = tail + .components() + .try_fold(directory.to_path_buf(), |accum, c| { + let candidate = accum.join(c); + if candidate.is_symlink() { + None + } else { + Some(candidate) + } + }) + .is_none(); + if found { + return Walk::empty(); + } + } + } let components = Walk::compile(glob.tree.as_ref().tokens()).expect("failed to compile glob sub-expressions"); Walk { @@ -921,6 +961,7 @@ pub fn walk<'g>( }) .max_depth(depth) .into_iter(), + is_empty: false, } }