Skip to content

Commit

Permalink
local_working_copy: spawn snapshot job per directory
Browse files Browse the repository at this point in the history
This change basically means two things:
 a. a directory scan isn't split into too many small jobs, and
 b. a directory scan isn't blocked by recursive visit_directory() calls.
Before, small jobs were created at each recursion depth, so there were silent
time slice before these jobs started producing work.

I don't know if this mitigates the issue jj-vcs#4508, but it's slightly faster on my
Linux machine (except for the "git" repo which doesn't have many directories to
parallelize jobs.) The implementation is also simpler.

matcher.visit(dir) is moved to caller because it's silly to spawn an empty job.
TreeState::snapshot() already checks that for the root path.

This change will allow us to build a set of deleted files per directory, which
will reduce the overhead to send "present" files over the channel.

Benchmark:
- jj-0: original
- jj-1: this patch

gecko-dev (large):
```
% JJ_CONFIG=/dev/null hyperfine --sort command --warmup 3 --runs 10 -L bin jj-0,jj-1,jj-2 \
  'target/release-with-debug/{bin} -R ~/mirrors/gecko-dev debug snapshot'
Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/gecko-dev debug snapshot
  Time (mean ± σ):     707.4 ms ±  12.3 ms    [User: 2880.2 ms, System: 1939.1 ms]
  Range (min … max):   684.4 ms … 721.7 ms    10 runs

Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/gecko-dev debug snapshot
  Time (mean ± σ):     659.6 ms ±  11.8 ms    [User: 2673.2 ms, System: 1882.1 ms]
  Range (min … max):   649.2 ms … 690.1 ms    10 runs

Relative speed comparison
        1.61 ±  0.05  target/release-with-debug/jj-0 -R ~/mirrors/gecko-dev debug snapshot
        1.50 ±  0.05  target/release-with-debug/jj-1 -R ~/mirrors/gecko-dev debug snapshot
```

linux (mid-size):
```
% JJ_CONFIG=/dev/null hyperfine --sort command --warmup 3 --runs 10 -L bin jj-0,jj-1,jj-2 \
  'target/release-with-debug/{bin} -R ~/mirrors/linux debug snapshot'
Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux debug snapshot
  Time (mean ± σ):     256.3 ms ±   6.2 ms    [User: 573.0 ms, System: 459.2 ms]
  Range (min … max):   248.5 ms … 266.0 ms    10 runs

Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux debug snapshot
  Time (mean ± σ):     225.5 ms ±   4.2 ms    [User: 563.3 ms, System: 475.8 ms]
  Range (min … max):   218.7 ms … 231.4 ms    10 runs

Relative speed comparison
        1.35 ±  0.04  target/release-with-debug/jj-0 -R ~/mirrors/linux debug snapshot
        1.19 ±  0.03  target/release-with-debug/jj-1 -R ~/mirrors/linux debug snapshot
```

git (small, flat):
```
% JJ_CONFIG=/dev/null hyperfine --sort command --warmup 3 --runs 100 -L bin jj-0,jj-1,jj-2 \
  'target/release-with-debug/{bin} -R ~/mirrors/git debug snapshot'
Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/git debug snapshot
  Time (mean ± σ):      30.6 ms ±   1.6 ms    [User: 38.8 ms, System: 40.6 ms]
  Range (min … max):    28.1 ms …  43.8 ms    100 runs

Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/git debug snapshot
  Time (mean ± σ):      34.0 ms ±   1.2 ms    [User: 33.5 ms, System: 47.5 ms]
  Range (min … max):    31.5 ms …  39.2 ms    100 runs

Relative speed comparison
        1.00          target/release-with-debug/jj-0 -R ~/mirrors/git debug snapshot
        1.11 ±  0.07  target/release-with-debug/jj-1 -R ~/mirrors/git debug snapshot
```

jj (small, structured):
```
% JJ_CONFIG=/dev/null hyperfine --sort command --warmup 3 --runs 100 -L bin jj-0,jj-1,jj-2 \
  'target/release-with-debug/{bin} -R . debug snapshot'
Benchmark 1: target/release-with-debug/jj-0 -R . debug snapshot
  Time (mean ± σ):      30.9 ms ±   1.6 ms    [User: 16.3 ms, System: 28.0 ms]
  Range (min … max):    28.9 ms …  41.8 ms    100 runs

Benchmark 2: target/release-with-debug/jj-1 -R . debug snapshot
  Time (mean ± σ):      31.2 ms ±   1.9 ms    [User: 15.3 ms, System: 27.0 ms]
  Range (min … max):    29.0 ms …  43.3 ms    100 runs

Relative speed comparison
        1.00          target/release-with-debug/jj-0 -R . debug snapshot
        1.01 ±  0.08  target/release-with-debug/jj-1 -R . debug snapshot
```

- CPU: 8-core AMD Ryzen 7 PRO 4750U with Radeon Graphics (-MT MCP-)
- speed/min/max: 1600/1400/1700 MHz Kernel: 6.11.10-amd64 x86_64
- Filesystem: ext4
  • Loading branch information
yuja committed Dec 5, 2024
1 parent 9b01af9 commit ad9a0ef
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 9 deletions.
46 changes: 37 additions & 9 deletions lib/src/local_working_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use std::slice;
use std::sync::mpsc::channel;
use std::sync::mpsc::Sender;
use std::sync::Arc;
use std::sync::OnceLock;
use std::time::UNIX_EPOCH;

use futures::StreamExt;
Expand Down Expand Up @@ -897,6 +898,7 @@ impl TreeState {
tree_entries_tx,
file_states_tx,
present_files_tx,
error: OnceLock::new(),
progress,
max_new_file_size,
conflict_marker_style,
Expand All @@ -907,7 +909,8 @@ impl TreeState {
git_ignore: base_ignores.clone(),
file_states: self.file_states.all(),
};
snapshotter.visit_directory(directory_to_visit)
rayon::scope(|scope| snapshotter.visit_directory(directory_to_visit, scope))?;
snapshotter.into_result()
})?;

let mut tree_builder = MergedTreeBuilder::new(self.tree_id.clone());
Expand Down Expand Up @@ -1030,24 +1033,47 @@ struct FileSnapshotter<'a> {
tree_entries_tx: Sender<(RepoPathBuf, MergedTreeValue)>,
file_states_tx: Sender<(RepoPathBuf, FileState)>,
present_files_tx: Sender<RepoPathBuf>,
error: OnceLock<SnapshotError>,
progress: Option<&'a SnapshotProgress<'a>>,
max_new_file_size: u64,
conflict_marker_style: ConflictMarkerStyle,
}

impl FileSnapshotter<'_> {
fn visit_directory(&self, directory_to_visit: DirectoryToVisit) -> Result<(), SnapshotError> {
fn spawn_ok<'scope, F>(&'scope self, scope: &rayon::Scope<'scope>, body: F)
where
F: FnOnce(&rayon::Scope<'scope>) -> Result<(), SnapshotError> + Send + 'scope,
{
scope.spawn(|scope| {
if self.error.get().is_some() {
return;
}
match body(scope) {
Ok(()) => {}
Err(err) => self.error.set(err).unwrap_or(()),
};
});
}

/// Extracts the result of the snapshot.
fn into_result(self) -> Result<(), SnapshotError> {
self.error.into_inner().map_or(Ok(()), Err)
}

/// Visits the directory entries sequentially, spawns jobs to recurse into
/// sub directories.
fn visit_directory<'scope>(
&'scope self,
directory_to_visit: DirectoryToVisit<'scope>,
scope: &rayon::Scope<'scope>,
) -> Result<(), SnapshotError> {
let DirectoryToVisit {
dir,
disk_dir,
git_ignore,
file_states,
} = directory_to_visit;

if self.matcher.visit(&dir).is_nothing() {
return Ok(());
}

let git_ignore = git_ignore
.chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"))?;
let to_read_dir_err = |err: io::Error| SnapshotError::Other {
Expand Down Expand Up @@ -1085,15 +1111,17 @@ impl FileSnapshotter<'_> {

// If the whole directory is ignored, visit only paths we're already
// tracking.
self.visit_tracked_files(file_states)?;
} else {
self.spawn_ok(scope, move |_| self.visit_tracked_files(file_states));
} else if !self.matcher.visit(&path).is_nothing() {
let directory_to_visit = DirectoryToVisit {
dir: path,
disk_dir: entry.path(),
git_ignore: git_ignore.clone(),
file_states,
};
self.visit_directory(directory_to_visit)?;
self.spawn_ok(scope, |scope| {
self.visit_directory(directory_to_visit, scope)
});
}
} else if self.matcher.matches(&path) {
if let Some(progress) = self.progress {
Expand Down
17 changes: 17 additions & 0 deletions lib/tests/test_local_working_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2111,4 +2111,21 @@ fn test_snapshot_max_new_file_size() {
matches!(err, SnapshotError::NewFileTooLarge { .. }),
"the failure should be attributed to new file size"
);

// A file in sub directory should also be caught
let sub_large_path = RepoPath::from_internal_string("sub/large");
std::fs::create_dir(
sub_large_path
.parent()
.unwrap()
.to_fs_path_unchecked(&workspace_root),
)
.unwrap();
std::fs::rename(
large_path.to_fs_path_unchecked(&workspace_root),
sub_large_path.to_fs_path_unchecked(&workspace_root),
)
.unwrap();
let result = test_workspace.snapshot_with_options(&options);
assert_matches!(result, Err(SnapshotError::NewFileTooLarge { .. }));
}

0 comments on commit ad9a0ef

Please sign in to comment.