From 1b3b358161f8b62a9e3fd83091369dd1c09f8bce Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 8 Jun 2018 18:49:43 -0700 Subject: [PATCH 1/6] fix some comments --- src/rust/engine/fs/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rust/engine/fs/src/lib.rs b/src/rust/engine/fs/src/lib.rs index 2fea77fccdc..11c5a123d12 100644 --- a/src/rust/engine/fs/src/lib.rs +++ b/src/rust/engine/fs/src/lib.rs @@ -978,7 +978,8 @@ pub trait VFS: Clone + Send + Sync + 'static { // because different `DirWildcard`s can potentially expand (transitively) to the same // intermediate glob. The "parents" of each glob are stored in the `sources` field of its // `GlobExpansionCacheEntry` (which is mutably updated with any new parents on each - // iteration of the loop_fn above). This can be considered "amortized" and/or "memoized". + // iteration of the loop_fn above). This can be considered "amortized" and/or "memoized", + // because we only traverse every parent -> child link once. let new_matched_source_globs = match completed.get(&cur_glob).unwrap() { &GlobExpansionCacheEntry { ref matched, @@ -1032,7 +1033,6 @@ pub trait VFS: Clone + Send + Sync + 'static { if strict_match_behavior.should_throw_on_error() { return future::err(Self::mk_error(&msg)); } else { - // FIXME(#5683): warn!() doesn't seem to do anything? // TODO(#5683): this doesn't have any useful context (the stack trace) without // being thrown -- this needs to be provided, otherwise this is unusable. warn!("{}", msg); From 438a0c13e257db816c16181f4cd9a5a44cbab33f Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 9 Jun 2018 15:52:50 -0700 Subject: [PATCH 2/6] move glob matching into its own file --- src/rust/engine/fs/src/glob_matching.rs | 397 ++++++++++++++++++++++++ src/rust/engine/fs/src/lib.rs | 372 +--------------------- src/rust/engine/src/nodes.rs | 2 +- src/rust/engine/src/scheduler.rs | 2 +- 4 files changed, 401 insertions(+), 372 deletions(-) create mode 100644 src/rust/engine/fs/src/glob_matching.rs diff --git a/src/rust/engine/fs/src/glob_matching.rs b/src/rust/engine/fs/src/glob_matching.rs new file mode 100644 index 00000000000..507ad239703 --- /dev/null +++ b/src/rust/engine/fs/src/glob_matching.rs @@ -0,0 +1,397 @@ +// Copyright 2018 Pants project contributors (see CONTRIBUTORS.md). +// Licensed under the Apache License, Version 2.0 (see LICENSE). + +use std::collections::HashSet; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use boxfuture::{BoxFuture, Boxable}; +use futures::Future; +use futures::future; +use glob::Pattern; +use indexmap::{IndexMap, IndexSet, map::Entry::Occupied}; + +use {Dir, GitignoreStyleExcludes, GlobParsedSource, GlobSource, GlobWithSource, Link, PathGlob, + PathGlobs, PathStat, Stat, VFS}; + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +enum GlobMatch { + SuccessfullyMatchedSomeFiles, + DidNotMatchAnyFiles, +} + +#[derive(Debug)] +struct GlobExpansionCacheEntry { + globs: Vec, + matched: GlobMatch, + sources: Vec, +} + +#[derive(Debug)] +struct SingleExpansionResult { + sourced_glob: GlobWithSource, + path_stats: Vec, + globs: Vec, +} + +#[derive(Debug)] +struct PathGlobsExpansion { + context: T, + // Globs that have yet to be expanded, in order. + todo: Vec, + // Paths to exclude. + exclude: Arc, + // Globs that have already been expanded. + completed: IndexMap, + // Unique Paths that have been matched, in order. + outputs: IndexSet, +} + +trait GlobMatchingImplementation: VFS { + fn directory_listing( + &self, + canonical_dir: Dir, + symbolic_path: PathBuf, + wildcard: Pattern, + exclude: &Arc, + ) -> BoxFuture, E> { + // List the directory. + let context = self.clone(); + let exclude = exclude.clone(); + + self + .scandir(canonical_dir) + .and_then(move |dir_listing| { + // Match any relevant Stats, and join them into PathStats. + future::join_all( + dir_listing + .into_iter() + .filter(|stat| { + // Match relevant filenames. + stat + .path() + .file_name() + .map(|file_name| wildcard.matches_path(Path::new(file_name))) + .unwrap_or(false) + }) + .filter_map(|stat| { + // Append matched filenames. + stat + .path() + .file_name() + .map(|file_name| symbolic_path.join(file_name)) + .map(|symbolic_stat_path| (symbolic_stat_path, stat)) + }) + .map(|(stat_symbolic_path, stat)| { + // Canonicalize matched PathStats, and filter paths that are ignored by either the + // context, or by local excludes. Note that we apply context ignore patterns to both + // the symbolic and canonical names of Links, but only apply local excludes to their + // symbolic names. + if context.is_ignored(&stat) || exclude.is_ignored(&stat) { + future::ok(None).to_boxed() + } else { + match stat { + Stat::Link(l) => context.canonicalize(stat_symbolic_path, l), + Stat::Dir(d) => { + future::ok(Some(PathStat::dir(stat_symbolic_path.to_owned(), d))).to_boxed() + } + Stat::File(f) => { + future::ok(Some(PathStat::file(stat_symbolic_path.to_owned(), f))).to_boxed() + } + } + } + }) + .collect::>(), + ) + }) + .map(|path_stats| { + // See the TODO above. + path_stats.into_iter().filter_map(|pso| pso).collect() + }) + .to_boxed() + } + + /// + /// Recursively expands PathGlobs into PathStats while applying excludes. + /// + fn expand(&self, path_globs: PathGlobs) -> BoxFuture, E> { + let PathGlobs { + include, + exclude, + strict_match_behavior, + } = path_globs; + + if include.is_empty() { + return future::ok(vec![]).to_boxed(); + } + + let init = PathGlobsExpansion { + context: self.clone(), + todo: include + .iter() + .flat_map(|entry| entry.to_sourced_globs()) + .collect(), + exclude, + completed: IndexMap::default(), + outputs: IndexSet::default(), + }; + future::loop_fn(init, |mut expansion| { + // Request the expansion of all outstanding PathGlobs as a batch. + let round = future::join_all({ + let exclude = &expansion.exclude; + let context = &expansion.context; + expansion + .todo + .drain(..) + .map(|sourced_glob| context.expand_single(sourced_glob, exclude)) + .collect::>() + }); + round.map(move |single_expansion_results| { + // Collect distinct new PathStats and PathGlobs + for exp in single_expansion_results { + let SingleExpansionResult { + sourced_glob: GlobWithSource { path_glob, source }, + path_stats, + globs, + } = exp; + + expansion.outputs.extend(path_stats.clone()); + + expansion + .completed + .entry(path_glob.clone()) + .or_insert_with(|| GlobExpansionCacheEntry { + globs: globs.clone(), + matched: if path_stats.is_empty() { + GlobMatch::DidNotMatchAnyFiles + } else { + GlobMatch::SuccessfullyMatchedSomeFiles + }, + sources: vec![], + }) + .sources + .push(source); + + // Do we need to worry about cloning for all these `GlobSource`s (each containing a + // `PathGlob`)? + let source_for_children = GlobSource::ParentGlob(path_glob); + for child_glob in globs { + if let Occupied(mut entry) = expansion.completed.entry(child_glob.clone()) { + entry.get_mut().sources.push(source_for_children.clone()); + } else { + expansion.todo.push(GlobWithSource { + path_glob: child_glob, + source: source_for_children.clone(), + }); + } + } + } + + // If there were any new PathGlobs, continue the expansion. + if expansion.todo.is_empty() { + future::Loop::Break(expansion) + } else { + future::Loop::Continue(expansion) + } + }) + }).and_then(move |final_expansion| { + // Finally, capture the resulting PathStats from the expansion. + let PathGlobsExpansion { + outputs, + mut completed, + exclude, + .. + } = final_expansion; + + let match_results: Vec<_> = outputs.into_iter().collect(); + + if strict_match_behavior.should_check_glob_matches() { + // Each `GlobExpansionCacheEntry` stored in `completed` for some `PathGlob` has the field + // `matched` to denote whether that specific `PathGlob` matched any files. We propagate a + // positive `matched` condition to all transitive "parents" of any glob which expands to + // some non-empty set of `PathStat`s. The `sources` field contains the parents (see the enum + // `GlobSource`), which may be another glob, or it might be a `GlobParsedSource`. We record + // all `GlobParsedSource` inputs which transitively expanded to some file here, and below we + // warn or error if some of the inputs were not found. + let mut inputs_with_matches: HashSet = HashSet::new(); + + // `completed` is an IndexMap, and we immediately insert every glob we expand into + // `completed`, recording any `PathStat`s and `PathGlob`s it expanded to (and then expanding + // those child globs in the next iteration of the loop_fn). If we iterate in + // reverse order of expansion (using .rev()), we ensure that we have already visited every + // "child" glob of the glob we are operating on while iterating. This is a reverse + // "topological ordering" which preserves the partial order from parent to child globs. + let all_globs: Vec = completed.keys().rev().map(|pg| pg.clone()).collect(); + for cur_glob in all_globs { + // Note that we talk of "parents" and "childen", but this structure is actually a DAG, + // because different `DirWildcard`s can potentially expand (transitively) to the same + // intermediate glob. The "parents" of each glob are stored in the `sources` field of its + // `GlobExpansionCacheEntry` (which is mutably updated with any new parents on each + // iteration of the loop_fn above). This can be considered "amortized" and/or "memoized", + // because we only traverse every parent -> child link once. + let new_matched_source_globs = match completed.get(&cur_glob).unwrap() { + &GlobExpansionCacheEntry { + ref matched, + ref sources, + .. + } => match matched { + // Neither this glob, nor any of its children, expanded to any `PathStat`s, so we have + // nothing to propagate. + &GlobMatch::DidNotMatchAnyFiles => vec![], + &GlobMatch::SuccessfullyMatchedSomeFiles => sources + .iter() + .filter_map(|src| match src { + // This glob matched some files, so its parent also matched some files. + &GlobSource::ParentGlob(ref path_glob) => Some(path_glob.clone()), + // We've found one of the root inputs, coming from a glob which transitively + // matched some child -- record it (this may already exist in the set). + &GlobSource::ParsedInput(ref parsed_source) => { + inputs_with_matches.insert(parsed_source.clone()); + None + } + }) + .collect(), + }, + }; + new_matched_source_globs.into_iter().for_each(|path_glob| { + // Overwrite whatever was in there before -- we now know these globs transitively + // expanded to some non-empty set of `PathStat`s. + let entry = completed.get_mut(&path_glob).unwrap(); + entry.matched = GlobMatch::SuccessfullyMatchedSomeFiles; + }); + } + + // Get all the inputs which didn't transitively expand to any files. + let non_matching_inputs: Vec = include + .into_iter() + .map(|entry| entry.input) + .filter(|parsed_source| !inputs_with_matches.contains(parsed_source)) + .collect(); + + if !non_matching_inputs.is_empty() { + // TODO(#5684): explain what global and/or target-specific option to set to + // modify this behavior! + let msg = format!( + "Globs did not match. Excludes were: {:?}. Unmatched globs were: {:?}.", + exclude.exclude_patterns(), + non_matching_inputs + .iter() + .map(|parsed_source| parsed_source.0.clone()) + .collect::>(), + ); + if strict_match_behavior.should_throw_on_error() { + return future::err(Self::mk_error(&msg)); + } else { + // TODO(#5683): this doesn't have any useful context (the stack trace) without + // being thrown -- this needs to be provided, otherwise this is unusable. + warn!("{}", msg); + } + } + } + + future::ok(match_results) + }) + .to_boxed() + } + + /// + /// Apply a PathGlob, returning PathStats and additional PathGlobs that are needed for the + /// expansion. + /// + fn expand_single( + &self, + sourced_glob: GlobWithSource, + exclude: &Arc, + ) -> BoxFuture { + match sourced_glob.path_glob.clone() { + PathGlob::Wildcard { canonical_dir, symbolic_path, wildcard } => + // Filter directory listing to return PathStats, with no continuation. + self.directory_listing(canonical_dir, symbolic_path, wildcard, exclude) + .map(move |path_stats| SingleExpansionResult { + sourced_glob, + path_stats, + globs: vec![], + }) + .to_boxed(), + PathGlob::DirWildcard { canonical_dir, symbolic_path, wildcard, remainder } => + // Filter directory listing and request additional PathGlobs for matched Dirs. + self.directory_listing(canonical_dir, symbolic_path, wildcard, exclude) + .and_then(move |path_stats| { + path_stats.into_iter() + .filter_map(|ps| match ps { + PathStat::Dir { path, stat } => + Some( + PathGlob::parse_globs(stat, path, &remainder) + .map_err(|e| Self::mk_error(e.as_str())) + ), + PathStat::File { .. } => None, + }) + .collect::, E>>() + }) + .map(move |path_globs| { + let flattened = path_globs + .into_iter() + .flat_map(|path_globs| path_globs.into_iter()) + .collect(); + SingleExpansionResult { + sourced_glob, + path_stats: vec![], + globs: flattened, + } + }) + .to_boxed(), + } + } + + /// + /// Canonicalize the Link for the given Path to an underlying File or Dir. May result + /// in None if the PathStat represents a broken Link. + /// + /// Skips ignored paths both before and after expansion. + /// + /// TODO: Should handle symlink loops (which would exhibit as an infinite loop in expand). + /// + fn canonicalize(&self, symbolic_path: PathBuf, link: Link) -> BoxFuture, E> { + // Read the link, which may result in PathGlob(s) that match 0 or 1 Path. + let context = self.clone(); + self + .read_link(link) + .map(|dest_path| { + // If the link destination can't be parsed as PathGlob(s), it is broken. + dest_path + .to_str() + .and_then(|dest_str| { + // Escape any globs in the parsed dest, which should guarantee one output PathGlob. + PathGlob::create(&[Pattern::escape(dest_str)]).ok() + }) + .unwrap_or_else(|| vec![]) + }) + .and_then(|link_globs| { + let new_path_globs = + future::result(PathGlobs::from_globs(link_globs)).map_err(|e| Self::mk_error(e.as_str())); + new_path_globs.and_then(move |path_globs| context.expand(path_globs)) + }) + .map(|mut path_stats| { + // Since we've escaped any globs in the parsed path, expect either 0 or 1 destination. + path_stats.pop().map(|ps| match ps { + PathStat::Dir { stat, .. } => PathStat::dir(symbolic_path, stat), + PathStat::File { stat, .. } => PathStat::file(symbolic_path, stat), + }) + }) + .to_boxed() + } +} + +impl> GlobMatchingImplementation for T {} + +pub trait GlobMatching: VFS { + fn canonicalize(&self, symbolic_path: PathBuf, link: Link) -> BoxFuture, E> { + GlobMatchingImplementation::canonicalize(self, symbolic_path, link) + } + + fn expand(&self, path_globs: PathGlobs) -> BoxFuture, E> { + GlobMatchingImplementation::expand(self, path_globs) + } +} + +impl> GlobMatching for T {} diff --git a/src/rust/engine/fs/src/lib.rs b/src/rust/engine/fs/src/lib.rs index 11c5a123d12..f8e05068470 100644 --- a/src/rust/engine/fs/src/lib.rs +++ b/src/rust/engine/fs/src/lib.rs @@ -1,6 +1,8 @@ // Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). +mod glob_matching; +pub use glob_matching::GlobMatching; mod snapshot; pub use snapshot::{OneOffStoreFileByDigest, Snapshot, StoreFileByDigest, EMPTY_DIGEST, EMPTY_FINGERPRINT}; @@ -39,7 +41,6 @@ extern crate tempfile; extern crate testutil; use std::cmp::min; -use std::collections::HashSet; use std::io::{self, Read}; use std::os::unix::fs::PermissionsExt; use std::path::{Component, Path, PathBuf}; @@ -50,7 +51,6 @@ use bytes::Bytes; use futures::future::{self, Future}; use glob::Pattern; use ignore::gitignore::{Gitignore, GitignoreBuilder}; -use indexmap::{IndexMap, IndexSet, map::Entry::Occupied}; use boxfuture::{BoxFuture, Boxable}; @@ -474,41 +474,6 @@ pub struct GlobWithSource { source: GlobSource, } -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub enum GlobMatch { - SuccessfullyMatchedSomeFiles, - DidNotMatchAnyFiles, -} - -#[derive(Debug)] -struct GlobExpansionCacheEntry { - globs: Vec, - matched: GlobMatch, - sources: Vec, -} - -// FIXME(#5871): move glob matching to its own file so we don't need to leak this object (through -// the return type of expand_single()). -#[derive(Debug)] -pub struct SingleExpansionResult { - sourced_glob: GlobWithSource, - path_stats: Vec, - globs: Vec, -} - -#[derive(Debug)] -struct PathGlobsExpansion { - context: T, - // Globs that have yet to be expanded, in order. - todo: Vec, - // Paths to exclude. - exclude: Arc, - // Globs that have already been expanded. - completed: IndexMap, - // Unique Paths that have been matched, in order. - outputs: IndexSet, -} - /// /// All Stats consumed or return by this type are relative to the root. /// @@ -760,339 +725,6 @@ pub trait VFS: Clone + Send + Sync + 'static { fn scandir(&self, dir: Dir) -> BoxFuture, E>; fn is_ignored(&self, stat: &Stat) -> bool; fn mk_error(msg: &str) -> E; - - /// - /// Canonicalize the Link for the given Path to an underlying File or Dir. May result - /// in None if the PathStat represents a broken Link. - /// - /// Skips ignored paths both before and after expansion. - /// - /// TODO: Should handle symlink loops (which would exhibit as an infinite loop in expand). - /// - fn canonicalize(&self, symbolic_path: PathBuf, link: Link) -> BoxFuture, E> { - // Read the link, which may result in PathGlob(s) that match 0 or 1 Path. - let context = self.clone(); - self - .read_link(link) - .map(|dest_path| { - // If the link destination can't be parsed as PathGlob(s), it is broken. - dest_path - .to_str() - .and_then(|dest_str| { - // Escape any globs in the parsed dest, which should guarantee one output PathGlob. - PathGlob::create(&[Pattern::escape(dest_str)]).ok() - }) - .unwrap_or_else(|| vec![]) - }) - .and_then(|link_globs| { - let new_path_globs = - future::result(PathGlobs::from_globs(link_globs)).map_err(|e| Self::mk_error(e.as_str())); - new_path_globs.and_then(move |path_globs| context.expand(path_globs)) - }) - .map(|mut path_stats| { - // Since we've escaped any globs in the parsed path, expect either 0 or 1 destination. - path_stats.pop().map(|ps| match ps { - PathStat::Dir { stat, .. } => PathStat::dir(symbolic_path, stat), - PathStat::File { stat, .. } => PathStat::file(symbolic_path, stat), - }) - }) - .to_boxed() - } - - fn directory_listing( - &self, - canonical_dir: Dir, - symbolic_path: PathBuf, - wildcard: Pattern, - exclude: &Arc, - ) -> BoxFuture, E> { - // List the directory. - let context = self.clone(); - let exclude = exclude.clone(); - - self - .scandir(canonical_dir) - .and_then(move |dir_listing| { - // Match any relevant Stats, and join them into PathStats. - future::join_all( - dir_listing - .into_iter() - .filter(|stat| { - // Match relevant filenames. - stat - .path() - .file_name() - .map(|file_name| wildcard.matches_path(Path::new(file_name))) - .unwrap_or(false) - }) - .filter_map(|stat| { - // Append matched filenames. - stat - .path() - .file_name() - .map(|file_name| symbolic_path.join(file_name)) - .map(|symbolic_stat_path| (symbolic_stat_path, stat)) - }) - .map(|(stat_symbolic_path, stat)| { - // Canonicalize matched PathStats, and filter paths that are ignored by either the - // context, or by local excludes. Note that we apply context ignore patterns to both - // the symbolic and canonical names of Links, but only apply local excludes to their - // symbolic names. - if context.is_ignored(&stat) || exclude.is_ignored(&stat) { - future::ok(None).to_boxed() - } else { - match stat { - Stat::Link(l) => context.canonicalize(stat_symbolic_path, l), - Stat::Dir(d) => { - future::ok(Some(PathStat::dir(stat_symbolic_path.to_owned(), d))).to_boxed() - } - Stat::File(f) => { - future::ok(Some(PathStat::file(stat_symbolic_path.to_owned(), f))).to_boxed() - } - } - } - }) - .collect::>(), - ) - }) - .map(|path_stats| { - // See the TODO above. - path_stats.into_iter().filter_map(|pso| pso).collect() - }) - .to_boxed() - } - - /// - /// Recursively expands PathGlobs into PathStats while applying excludes. - /// - fn expand(&self, path_globs: PathGlobs) -> BoxFuture, E> { - let PathGlobs { - include, - exclude, - strict_match_behavior, - } = path_globs; - - if include.is_empty() { - return future::ok(vec![]).to_boxed(); - } - - let init = PathGlobsExpansion { - context: self.clone(), - todo: include - .iter() - .flat_map(|entry| entry.to_sourced_globs()) - .collect(), - exclude, - completed: IndexMap::default(), - outputs: IndexSet::default(), - }; - future::loop_fn(init, |mut expansion| { - // Request the expansion of all outstanding PathGlobs as a batch. - let round = future::join_all({ - let exclude = &expansion.exclude; - let context = &expansion.context; - expansion - .todo - .drain(..) - .map(|sourced_glob| context.expand_single(sourced_glob, exclude)) - .collect::>() - }); - round.map(move |single_expansion_results| { - // Collect distinct new PathStats and PathGlobs - for exp in single_expansion_results { - let SingleExpansionResult { - sourced_glob: GlobWithSource { path_glob, source }, - path_stats, - globs, - } = exp; - - expansion.outputs.extend(path_stats.clone()); - - expansion - .completed - .entry(path_glob.clone()) - .or_insert_with(|| GlobExpansionCacheEntry { - globs: globs.clone(), - matched: if path_stats.is_empty() { - GlobMatch::DidNotMatchAnyFiles - } else { - GlobMatch::SuccessfullyMatchedSomeFiles - }, - sources: vec![], - }) - .sources - .push(source); - - // Do we need to worry about cloning for all these `GlobSource`s (each containing a - // `PathGlob`)? - let source_for_children = GlobSource::ParentGlob(path_glob); - for child_glob in globs { - if let Occupied(mut entry) = expansion.completed.entry(child_glob.clone()) { - entry.get_mut().sources.push(source_for_children.clone()); - } else { - expansion.todo.push(GlobWithSource { - path_glob: child_glob, - source: source_for_children.clone(), - }); - } - } - } - - // If there were any new PathGlobs, continue the expansion. - if expansion.todo.is_empty() { - future::Loop::Break(expansion) - } else { - future::Loop::Continue(expansion) - } - }) - }).and_then(move |final_expansion| { - // Finally, capture the resulting PathStats from the expansion. - let PathGlobsExpansion { - outputs, - mut completed, - exclude, - .. - } = final_expansion; - - let match_results: Vec<_> = outputs.into_iter().collect(); - - if strict_match_behavior.should_check_glob_matches() { - // Each `GlobExpansionCacheEntry` stored in `completed` for some `PathGlob` has the field - // `matched` to denote whether that specific `PathGlob` matched any files. We propagate a - // positive `matched` condition to all transitive "parents" of any glob which expands to - // some non-empty set of `PathStat`s. The `sources` field contains the parents (see the enum - // `GlobSource`), which may be another glob, or it might be a `GlobParsedSource`. We record - // all `GlobParsedSource` inputs which transitively expanded to some file here, and below we - // warn or error if some of the inputs were not found. - let mut inputs_with_matches: HashSet = HashSet::new(); - - // `completed` is an IndexMap, and we immediately insert every glob we expand into - // `completed`, recording any `PathStat`s and `PathGlob`s it expanded to (and then expanding - // those child globs in the next iteration of the loop_fn). If we iterate in - // reverse order of expansion (using .rev()), we ensure that we have already visited every - // "child" glob of the glob we are operating on while iterating. This is a reverse - // "topological ordering" which preserves the partial order from parent to child globs. - let all_globs: Vec = completed.keys().rev().map(|pg| pg.clone()).collect(); - for cur_glob in all_globs { - // Note that we talk of "parents" and "childen", but this structure is actually a DAG, - // because different `DirWildcard`s can potentially expand (transitively) to the same - // intermediate glob. The "parents" of each glob are stored in the `sources` field of its - // `GlobExpansionCacheEntry` (which is mutably updated with any new parents on each - // iteration of the loop_fn above). This can be considered "amortized" and/or "memoized", - // because we only traverse every parent -> child link once. - let new_matched_source_globs = match completed.get(&cur_glob).unwrap() { - &GlobExpansionCacheEntry { - ref matched, - ref sources, - .. - } => match matched { - // Neither this glob, nor any of its children, expanded to any `PathStat`s, so we have - // nothing to propagate. - &GlobMatch::DidNotMatchAnyFiles => vec![], - &GlobMatch::SuccessfullyMatchedSomeFiles => sources - .iter() - .filter_map(|src| match src { - // This glob matched some files, so its parent also matched some files. - &GlobSource::ParentGlob(ref path_glob) => Some(path_glob.clone()), - // We've found one of the root inputs, coming from a glob which transitively - // matched some child -- record it (this may already exist in the set). - &GlobSource::ParsedInput(ref parsed_source) => { - inputs_with_matches.insert(parsed_source.clone()); - None - } - }) - .collect(), - }, - }; - new_matched_source_globs.into_iter().for_each(|path_glob| { - // Overwrite whatever was in there before -- we now know these globs transitively - // expanded to some non-empty set of `PathStat`s. - let entry = completed.get_mut(&path_glob).unwrap(); - entry.matched = GlobMatch::SuccessfullyMatchedSomeFiles; - }); - } - - // Get all the inputs which didn't transitively expand to any files. - let non_matching_inputs: Vec = include - .into_iter() - .map(|entry| entry.input) - .filter(|parsed_source| !inputs_with_matches.contains(parsed_source)) - .collect(); - - if !non_matching_inputs.is_empty() { - // TODO(#5684): explain what global and/or target-specific option to set to - // modify this behavior! - let msg = format!( - "Globs did not match. Excludes were: {:?}. Unmatched globs were: {:?}.", - exclude.exclude_patterns(), - non_matching_inputs - .iter() - .map(|parsed_source| parsed_source.0.clone()) - .collect::>(), - ); - if strict_match_behavior.should_throw_on_error() { - return future::err(Self::mk_error(&msg)); - } else { - // TODO(#5683): this doesn't have any useful context (the stack trace) without - // being thrown -- this needs to be provided, otherwise this is unusable. - warn!("{}", msg); - } - } - } - - future::ok(match_results) - }) - .to_boxed() - } - - /// - /// Apply a PathGlob, returning PathStats and additional PathGlobs that are needed for the - /// expansion. - /// - fn expand_single( - &self, - sourced_glob: GlobWithSource, - exclude: &Arc, - ) -> BoxFuture { - match sourced_glob.path_glob.clone() { - PathGlob::Wildcard { canonical_dir, symbolic_path, wildcard } => - // Filter directory listing to return PathStats, with no continuation. - self.directory_listing(canonical_dir, symbolic_path, wildcard, exclude) - .map(move |path_stats| SingleExpansionResult { - sourced_glob, - path_stats, - globs: vec![], - }) - .to_boxed(), - PathGlob::DirWildcard { canonical_dir, symbolic_path, wildcard, remainder } => - // Filter directory listing and request additional PathGlobs for matched Dirs. - self.directory_listing(canonical_dir, symbolic_path, wildcard, exclude) - .and_then(move |path_stats| { - path_stats.into_iter() - .filter_map(|ps| match ps { - PathStat::Dir { path, stat } => - Some( - PathGlob::parse_globs(stat, path, &remainder) - .map_err(|e| Self::mk_error(e.as_str())) - ), - PathStat::File { .. } => None, - }) - .collect::, E>>() - }) - .map(move |path_globs| { - let flattened = path_globs - .into_iter() - .flat_map(|path_globs| path_globs.into_iter()) - .collect(); - SingleExpansionResult { - sourced_glob, - path_stats: vec![], - globs: flattened, - } - }) - .to_boxed(), - } - } } pub struct FileContent { diff --git a/src/rust/engine/src/nodes.rs b/src/rust/engine/src/nodes.rs index 722b5b9c49b..d172dcf5719 100644 --- a/src/rust/engine/src/nodes.rs +++ b/src/rust/engine/src/nodes.rs @@ -17,7 +17,7 @@ use boxfuture::{BoxFuture, Boxable}; use context::{Context, Core}; use core::{throw, Failure, Key, Noop, TypeConstraint, Value, Variants}; use externs; -use fs::{self, Dir, File, FileContent, Link, PathGlobs, PathStat, StoreFileByDigest, +use fs::{self, Dir, File, FileContent, GlobMatching, Link, PathGlobs, PathStat, StoreFileByDigest, StrictGlobMatching, VFS}; use hashing; use process_execution::{self, CommandRunner}; diff --git a/src/rust/engine/src/scheduler.rs b/src/rust/engine/src/scheduler.rs index 8d656f5d00c..ddb22041f06 100644 --- a/src/rust/engine/src/scheduler.rs +++ b/src/rust/engine/src/scheduler.rs @@ -12,7 +12,7 @@ use futures::sync::oneshot; use boxfuture::{BoxFuture, Boxable}; use context::{Context, ContextFactory, Core}; use core::{Failure, Key, TypeConstraint, TypeId, Value}; -use fs::{self, PosixFS, VFS}; +use fs::{self, GlobMatching, PosixFS}; use graph::EntryId; use nodes::{NodeKey, Select}; use rule_graph; From 09dc752ba129287e077a2a7db2a749015cebccc1 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 9 Jun 2018 17:09:04 -0700 Subject: [PATCH 3/6] add GlobMatching import to fs_util main.rs --- src/rust/engine/fs/fs_util/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/engine/fs/fs_util/src/main.rs b/src/rust/engine/fs/fs_util/src/main.rs index b93e97d5b16..307fff0ea21 100644 --- a/src/rust/engine/fs/fs_util/src/main.rs +++ b/src/rust/engine/fs/fs_util/src/main.rs @@ -10,7 +10,7 @@ extern crate protobuf; use bytes::Bytes; use clap::{App, Arg, SubCommand}; -use fs::{ResettablePool, Snapshot, Store, StoreFileByDigest, VFS}; +use fs::{GlobMatching, ResettablePool, Snapshot, Store, StoreFileByDigest, VFS}; use futures::future::Future; use hashing::{Digest, Fingerprint}; use protobuf::Message; From c4740b4efbdf592adb5f3a55645b386415251c44 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 11 Jun 2018 15:03:12 -0700 Subject: [PATCH 4/6] move pub GlobMatching trait to top of file --- src/rust/engine/fs/src/glob_matching.rs | 50 +++++++++++++------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/src/rust/engine/fs/src/glob_matching.rs b/src/rust/engine/fs/src/glob_matching.rs index 507ad239703..4c67543c4aa 100644 --- a/src/rust/engine/fs/src/glob_matching.rs +++ b/src/rust/engine/fs/src/glob_matching.rs @@ -14,6 +14,29 @@ use indexmap::{IndexMap, IndexSet, map::Entry::Occupied}; use {Dir, GitignoreStyleExcludes, GlobParsedSource, GlobSource, GlobWithSource, Link, PathGlob, PathGlobs, PathStat, Stat, VFS}; +pub trait GlobMatching: VFS { + /// + /// Canonicalize the Link for the given Path to an underlying File or Dir. May result + /// in None if the PathStat represents a broken Link. + /// + /// Skips ignored paths both before and after expansion. + /// + /// TODO: Should handle symlink loops (which would exhibit as an infinite loop in expand). + /// + fn canonicalize(&self, symbolic_path: PathBuf, link: Link) -> BoxFuture, E> { + GlobMatchingImplementation::canonicalize(self, symbolic_path, link) + } + + /// + /// Recursively expands PathGlobs into PathStats while applying excludes. + /// + fn expand(&self, path_globs: PathGlobs) -> BoxFuture, E> { + GlobMatchingImplementation::expand(self, path_globs) + } +} + +impl> GlobMatching for T {} + #[derive(Clone, Debug, Eq, Hash, PartialEq)] enum GlobMatch { SuccessfullyMatchedSomeFiles, @@ -47,6 +70,10 @@ struct PathGlobsExpansion { outputs: IndexSet, } +// FIXME: This trait exists because `expand_single()` (and its return type) should be private, but +// traits don't allow specifying private methods (and we don't want to use a top-level `fn` because +// it's much more awkward than just specifying `&self`). +// The methods of `GlobMatching` are forwarded to methods here. trait GlobMatchingImplementation: VFS { fn directory_listing( &self, @@ -111,9 +138,6 @@ trait GlobMatchingImplementation: VFS { .to_boxed() } - /// - /// Recursively expands PathGlobs into PathStats while applying excludes. - /// fn expand(&self, path_globs: PathGlobs) -> BoxFuture, E> { let PathGlobs { include, @@ -343,14 +367,6 @@ trait GlobMatchingImplementation: VFS { } } - /// - /// Canonicalize the Link for the given Path to an underlying File or Dir. May result - /// in None if the PathStat represents a broken Link. - /// - /// Skips ignored paths both before and after expansion. - /// - /// TODO: Should handle symlink loops (which would exhibit as an infinite loop in expand). - /// fn canonicalize(&self, symbolic_path: PathBuf, link: Link) -> BoxFuture, E> { // Read the link, which may result in PathGlob(s) that match 0 or 1 Path. let context = self.clone(); @@ -383,15 +399,3 @@ trait GlobMatchingImplementation: VFS { } impl> GlobMatchingImplementation for T {} - -pub trait GlobMatching: VFS { - fn canonicalize(&self, symbolic_path: PathBuf, link: Link) -> BoxFuture, E> { - GlobMatchingImplementation::canonicalize(self, symbolic_path, link) - } - - fn expand(&self, path_globs: PathGlobs) -> BoxFuture, E> { - GlobMatchingImplementation::expand(self, path_globs) - } -} - -impl> GlobMatching for T {} From 5c7829016d18c6679ed7470988c8d14cdc468dbf Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 11 Jun 2018 15:07:54 -0700 Subject: [PATCH 5/6] fix substantive ci error --- src/rust/engine/fs/src/snapshot.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rust/engine/fs/src/snapshot.rs b/src/rust/engine/fs/src/snapshot.rs index 58e25e1de86..e5b24aaf343 100644 --- a/src/rust/engine/fs/src/snapshot.rs +++ b/src/rust/engine/fs/src/snapshot.rs @@ -376,8 +376,8 @@ mod tests { use self::testutil::data::TestDirectory; use super::OneOffStoreFileByDigest; - use super::super::{Dir, File, Path, PathGlobs, PathStat, PosixFS, ResettablePool, Snapshot, - Store, StrictGlobMatching, VFS}; + use super::super::{Dir, File, GlobMatching, Path, PathGlobs, PathStat, PosixFS, ResettablePool, + Snapshot, Store, StrictGlobMatching, VFS}; use std; use std::path::PathBuf; From 24a2fb762b58e63e0501a18a1e47bf0db592444c Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 11 Jun 2018 15:11:37 -0700 Subject: [PATCH 6/6] downgrade comment severity --- src/rust/engine/fs/src/glob_matching.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/engine/fs/src/glob_matching.rs b/src/rust/engine/fs/src/glob_matching.rs index 4c67543c4aa..6201c7e2e9f 100644 --- a/src/rust/engine/fs/src/glob_matching.rs +++ b/src/rust/engine/fs/src/glob_matching.rs @@ -70,7 +70,7 @@ struct PathGlobsExpansion { outputs: IndexSet, } -// FIXME: This trait exists because `expand_single()` (and its return type) should be private, but +// NB: This trait exists because `expand_single()` (and its return type) should be private, but // traits don't allow specifying private methods (and we don't want to use a top-level `fn` because // it's much more awkward than just specifying `&self`). // The methods of `GlobMatching` are forwarded to methods here.