Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not invalidate parent paths when only file content has changed #13566

Merged
merged 2 commits into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions src/python/pants/engine/internals/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,7 @@ def rule_graph_consumed_types(
self.py_scheduler, root_subject_types, product_type
)

def invalidate_files(self, direct_filenames: Iterable[str]) -> int:
filenames = set(direct_filenames)
# TODO(#11707): Evaluate removing the invalidation of parent directories.
filenames.update(os.path.dirname(f) for f in direct_filenames)
def invalidate_files(self, filenames: Iterable[str]) -> int:
return native_engine.graph_invalidate_paths(self.py_scheduler, tuple(filenames))

def invalidate_all_files(self) -> int:
Expand Down
121 changes: 68 additions & 53 deletions src/rust/engine/watch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ use std::time::Duration;
use crossbeam_channel::{self, Receiver, RecvTimeoutError, TryRecvError};
use fs::GitignoreStyleExcludes;
use log::{debug, trace, warn};
use notify::event::Flag;
use notify::{RecommendedWatcher, RecursiveMode, Watcher};
use notify::event::{Flag, ModifyKind};
use notify::{Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
use parking_lot::Mutex;
use task_executor::Executor;

Expand All @@ -63,7 +63,7 @@ type WatcherTaskInputs = (
Arc<GitignoreStyleExcludes>,
PathBuf,
crossbeam_channel::Sender<String>,
Receiver<notify::Result<notify::Event>>,
Receiver<notify::Result<Event>>,
);

pub struct InvalidationWatcher(Mutex<Inner>);
Expand Down Expand Up @@ -145,7 +145,7 @@ impl InvalidationWatcher {
ignorer: Arc<GitignoreStyleExcludes>,
canonical_build_root: PathBuf,
liveness_sender: crossbeam_channel::Sender<String>,
watch_receiver: Receiver<notify::Result<notify::Event>>,
watch_receiver: Receiver<notify::Result<Event>>,
) -> thread::JoinHandle<()> {
thread::spawn(move || {
let exit_msg = loop {
Expand All @@ -157,55 +157,7 @@ impl InvalidationWatcher {
break "The watcher was shut down.".to_string();
};
match event_res {
Ok(Ok(ev)) => {
let flag = ev.flag();
let paths: HashSet<_> = ev
.paths
.into_iter()
.filter_map(|path| {
// relativize paths to build root.
let path_relative_to_build_root = if path.starts_with(&canonical_build_root) {
// Unwrapping is fine because we check that the path starts with
// the build root above.
path.strip_prefix(&canonical_build_root).unwrap().into()
} else {
path
};
// To avoid having to stat paths for events we will eventually ignore we "lie" to the ignorer
// to say that no path is a directory, they could be if someone chmod's or creates a dir.
// This maintains correctness by ensuring that at worst we have false negative events, where a directory
// only glob (one that ends in `/` ) was supposed to ignore a directory path, but didn't because we claimed it was a file. That
// directory path will be used to invalidate nodes, but won't invalidate anything because its path is somewhere
// out of our purview.
if ignorer.is_ignored_or_child_of_ignored_path(
&path_relative_to_build_root,
/* is_dir */ false,
) {
trace!("notify ignoring {:?}", path_relative_to_build_root);
None
} else {
Some(path_relative_to_build_root)
}
})
.flat_map(|path_relative_to_build_root| {
let mut paths_to_invalidate: Vec<PathBuf> = vec![];
if let Some(parent_dir) = path_relative_to_build_root.parent() {
paths_to_invalidate.push(parent_dir.to_path_buf());
}
paths_to_invalidate.push(path_relative_to_build_root);
paths_to_invalidate
})
.collect();

// Only invalidate stuff if we have paths that weren't filtered out by gitignore.
if flag == Some(Flag::Rescan) {
debug!("notify queue overflowed: invalidating all paths");
invalidatable.invalidate_all("notify");
} else if !paths.is_empty() {
debug!("notify invalidating {:?} because of {:?}", paths, ev.kind);
invalidatable.invalidate(&paths, "notify");
}
}
Ok(Ok(ev)) => Self::handle_event(&*invalidatable, &ignorer, &canonical_build_root, ev),
Ok(Err(err)) => {
if let notify::ErrorKind::PathNotFound = err.kind {
warn!("Path(s) did not exist: {:?}", err.paths);
Expand All @@ -227,6 +179,69 @@ impl InvalidationWatcher {
})
}

fn handle_event<I: Invalidatable>(
invalidatable: &I,
ignorer: &GitignoreStyleExcludes,
canonical_build_root: &Path,
ev: Event,
) {
let is_data_only_event = matches!(ev.kind, EventKind::Modify(ModifyKind::Data(_)));
let flag = ev.flag();

let paths: HashSet<_> = ev
.paths
.into_iter()
.filter_map(|path| {
// relativize paths to build root.
let path_relative_to_build_root = if path.starts_with(canonical_build_root) {
// Unwrapping is fine because we check that the path starts with
// the build root above.
path.strip_prefix(canonical_build_root).unwrap().into()
} else {
path
};
// To avoid having to stat paths for events we will eventually ignore we "lie" to
// the ignorer to say that no path is a directory (although they could be if someone
// chmod's or creates a dir).
//
// This maintains correctness by ensuring that at worst we have false negative
// events, where a directory-only glob (one that ends in `/` ) was supposed to
// ignore a directory path, but didn't because we claimed it was a file. That
// directory path will be used to invalidate nodes, but won't invalidate anything
// because its path is somewhere out of our purview.
if ignorer.is_ignored_or_child_of_ignored_path(
&path_relative_to_build_root,
/* is_dir */ false,
) {
trace!("notify ignoring {:?}", path_relative_to_build_root);
None
} else {
Some(path_relative_to_build_root)
}
})
.flat_map(|path_relative_to_build_root| {
let mut paths_to_invalidate: Vec<PathBuf> = Vec::with_capacity(2);
if !is_data_only_event {
// If the event is anything other than a data change event (a change to the content of
// a file), then we additionally invalidate the parent of the path.
if let Some(parent_dir) = path_relative_to_build_root.parent() {
paths_to_invalidate.push(parent_dir.to_path_buf());
}
}
paths_to_invalidate.push(path_relative_to_build_root);
paths_to_invalidate
})
.collect();

if flag == Some(Flag::Rescan) {
debug!("notify queue overflowed: invalidating all paths");
invalidatable.invalidate_all("notify");
} else if !paths.is_empty() {
debug!("notify invalidating {:?} because of {:?}", paths, ev.kind);
invalidatable.invalidate(&paths, "notify");
}
}

///
/// An InvalidationWatcher will never restart on its own: a consumer should re-initialize if this
/// method returns an error.
Expand Down