Skip to content

Commit

Permalink
feat: implement inputs and outputs hash based task skipping (#933)
Browse files Browse the repository at this point in the history
Co-authored-by: Bas Zalmstra <bas@prefix.dev>
  • Loading branch information
wolfv and baszalmstra authored Mar 15, 2024
1 parent 7a535bd commit 9fb8cae
Show file tree
Hide file tree
Showing 14 changed files with 705 additions and 44 deletions.
123 changes: 87 additions & 36 deletions Cargo.lock

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ assert_matches = "1.5.0"
async-once-cell = "0.5.3"
async-recursion = "1.0.5"
async-scoped = { version = "0.9.0", features = ["use-tokio"] }
cfg-if = "1.0"
chrono = "0.4.35"
clap = { version = "4.5.2", default-features = false, features = [
blake3 = "1.5.0"
cfg-if = "0.1"
chrono = "0.4.34"
clap = { version = "4.5.1", default-features = false, features = [
"derive",
"usage",
"wrap_help",
Expand All @@ -43,6 +44,7 @@ clap = { version = "4.5.2", default-features = false, features = [
clap-verbosity-flag = "2.2.0"
clap_complete = "4.5.1"
console = { version = "0.15.8", features = ["windows-console-colors"] }
crossbeam-channel = "0.5.12"
deno_task_shell = "0.14.4"
dialoguer = "0.11.0"
dirs = "5.0.1"
Expand All @@ -51,9 +53,11 @@ distribution-types = { git = "https://github.com/astral-sh/uv", tag = "0.1.16" }
dunce = "1.0.4"
flate2 = "1.0.28"
futures = "0.3.30"
globset = "0.4.14"
http-cache-reqwest = "0.13.0"
human_bytes = "0.4.3"
humantime = "2.1.0"
ignore = "0.4.22"
indexmap = { version = "2.2.5", features = ["serde"] }
indicatif = "0.17.8"

Expand Down Expand Up @@ -128,6 +132,7 @@ uv-interpreter = { git = "https://github.com/astral-sh/uv", tag = "0.1.16" }
uv-normalize = { git = "https://github.com/astral-sh/uv", tag = "0.1.16" }
uv-resolver = { git = "https://github.com/astral-sh/uv", tag = "0.1.16" }
uv-traits = { git = "https://github.com/astral-sh/uv", tag = "0.1.16" }
xxhash-rust = "0.8.10"
zip = { version = "0.6.6", default-features = false, features = [
"deflate",
"time",
Expand Down
35 changes: 35 additions & 0 deletions docs/advanced/advanced_tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,41 @@ This will add the following line to `pixi.toml`:
bar = { cmd = "python bar.py", cwd = "scripts" }
```

## Caching

When you specify `inputs` and/or `outputs` to a task, pixi will reuse the result of the task.

For the cache, pixi checks that the following are true:

- No package in the environment has changed.
- The selected inputs and outputs are the same as the last time the task was
run. We compute fingerprints of all the files selected by the globs and
compare them to the last time the task was run.
- The command is the same as the last time the task was run.

If all of these conditions are met, pixi will not run the task again and instead use the existing result.

Inputs and outputs can be specified as globs, which will be expanded to all matching files.

```toml title="pixi.toml"
[tasks]
# This task will only run if the `main.py` file has changed.
run = { cmd = "python main.py", inputs = ["main.py"] }

# This task will remember the result of the `curl` command and not run it again if the file `data.csv` already exists.
download_data = { cmd = "curl -o data.csv https://example.com/data.csv", outputs = ["data.csv"] }

# This task will only run if the `src` directory has changed and will remember the result of the `make` command.
build = { cmd = "make", inputs = ["src/*.cpp", "include/*.hpp"], outputs = ["build/app.exe"] }
```

Note: if you want to debug the globs you can use the `--verbose` flag to see which files are selected.

```shell
# shows info logs of all files that were selected by the globs
pixi run -v start
```

## Our task runner: deno_task_shell

To support the different OS's (Windows, OSX and Linux), pixi integrates a shell that can run on all of them.
Expand Down
13 changes: 11 additions & 2 deletions examples/cpp-sdl/pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,19 @@ configure = { cmd = [
# We wanna build in the .build directory
"-B",
".build",
] }
], inputs = ["CMakeLists.txt"], outputs = [".build/CMakeFiles/"] }

# Build the executable but make sure CMake is configured first.
build = { cmd = ["ninja", "-C", ".build"], depends_on = ["configure"] }
[feature.build.tasks.build]
cmd = ["ninja", "-C", ".build"]
depends_on = ["configure"]
inputs = [
"CMakeLists.txt",
"src/*"
]
outputs = [
".build/bin/sdl_example"
]

[environments]
build = ["build"]
3 changes: 2 additions & 1 deletion schema/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ class TaskInlineTable(StrictBaseModel):
depends_on: list[NonEmptyStr] | NonEmptyStr | None = Field(
None, description="The tasks that this task depends on"
)

inputs: list[Glob] | None = Field(None, description="A list of glob patterns that should be watched for changes before this command is run")
outputs: list[Glob] | None = Field(None, description="A list of glob patterns that are generated by this command")

#######################
# System requirements #
Expand Down
34 changes: 34 additions & 0 deletions schema/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,40 @@
"default": null,
"description": "The tasks that this task depends on",
"title": "Depends On"
},
"inputs": {
"anyOf": [
{
"items": {
"minLength": 1,
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"description": "A list of glob patterns that should be watched for changes before this command is run",
"title": "Inputs"
},
"outputs": {
"anyOf": [
{
"items": {
"minLength": 1,
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"description": "A list of glob patterns that are generated by this command",
"title": "Outputs"
}
},
"title": "TaskInlineTable",
Expand Down
22 changes: 21 additions & 1 deletion src/cli/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::activation::get_environment_variables;
use crate::environment::verify_prefix_location_unchanged;
use crate::project::errors::UnsupportedPlatformError;
use crate::task::{
AmbiguousTask, ExecutableTask, FailedToParseShellScript, InvalidWorkingDirectory,
AmbiguousTask, CanSkip, ExecutableTask, FailedToParseShellScript, InvalidWorkingDirectory,
SearchEnvironments, TaskAndEnvironment, TaskGraph, TaskName,
};
use crate::Project;
Expand Down Expand Up @@ -142,6 +142,20 @@ pub async fn execute(args: Args) -> miette::Result<()> {
);
}

// check task cache
let task_cache = match executable_task
.can_skip(&lock_file)
.await
.into_diagnostic()?
{
CanSkip::No(cache) => cache,
CanSkip::Yes => {
eprintln!("Task can be skipped (cache hit) 🚀");
task_idx += 1;
continue;
}
};

// If we don't have a command environment yet, we need to compute it. We lazily compute the
// task environment because we only need the environment if a task is actually executed.
let task_env: &_ = match task_envs.entry(executable_task.run_environment.clone()) {
Expand All @@ -167,6 +181,12 @@ pub async fn execute(args: Args) -> miette::Result<()> {
}
Err(err) => return Err(err.into()),
}

// Update the task cache with the new hash
executable_task
.save_cache(&lock_file, task_cache)
.await
.into_diagnostic()?;
}

Ok(())
Expand Down
2 changes: 2 additions & 0 deletions src/cli/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ impl From<AddArgs> for Task {
Self::Execute(Execute {
cmd: CmdArgs::Single(cmd_args),
depends_on,
inputs: None,
outputs: None,
cwd: value.cwd,
})
}
Expand Down
2 changes: 2 additions & 0 deletions src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ pub const PREFIX_FILE_NAME: &str = "pixi_env_prefix";
pub const ENVIRONMENTS_DIR: &str = "envs";
pub const SOLVE_GROUP_ENVIRONMENTS_DIR: &str = "solve-group-envs";
pub const PYPI_DEPENDENCIES: &str = "pypi-dependencies";
pub const TASK_CACHE_DIR: &str = "task-cache-v0";

pub const DEFAULT_ENVIRONMENT_NAME: &str = "default";

/// The default channels to use for a new project.
Expand Down
4 changes: 4 additions & 0 deletions src/project/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,10 @@ impl Project {
})
.await
}

pub(crate) fn task_cache_folder(&self) -> PathBuf {
self.pixi_dir().join(consts::TASK_CACHE_DIR)
}
}

/// Iterates over the current directory and all its parent directories and returns the first
Expand Down
83 changes: 83 additions & 0 deletions src/task/executable_task.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::consts::TASK_STYLE;
use crate::lock_file::LockFileDerivedData;
use crate::project::Environment;
use crate::task::TaskName;
use crate::{
Expand All @@ -20,6 +21,8 @@ use std::{
use thiserror::Error;
use tokio::task::JoinHandle;

use super::task_hash::{InputHashesError, TaskCache, TaskHash};

/// Runs task in project.
#[derive(Default, Debug)]
pub struct RunOutput {
Expand All @@ -45,10 +48,28 @@ pub struct InvalidWorkingDirectory {
pub enum TaskExecutionError {
#[error(transparent)]
InvalidWorkingDirectory(#[from] InvalidWorkingDirectory),

#[error(transparent)]
FailedToParseShellScript(#[from] FailedToParseShellScript),
}

#[derive(Debug, Error, Diagnostic)]
pub enum CacheUpdateError {
#[error(transparent)]
Io(#[from] std::io::Error),

#[error(transparent)]
TaskHashError(#[from] InputHashesError),

#[error("failed to serialize cache")]
Serialization(#[from] serde_json::Error),
}

pub enum CanSkip {
Yes,
No(Option<TaskHash>),
}

/// A task that contains enough information to be able to execute it. The lifetime [`'p`] refers to
/// the lifetime of the project that contains the tasks.
#[derive(Clone)]
Expand Down Expand Up @@ -181,6 +202,68 @@ impl<'p> ExecutableTask<'p> {
stderr: stderr_handle.await.unwrap(),
})
}

/// We store the hashes of the inputs and the outputs of the task in a file in the cache.
/// The current name is something like `run_environment-task_name.json`.
pub(crate) fn cache_name(&self) -> String {
format!(
"{}-{}.json",
self.run_environment.name(),
self.name().unwrap_or("default")
)
}

/// Checks if the task can be skipped. If the task can be skipped, it returns `CanSkip::Yes`.
/// If the task cannot be skipped, it returns `CanSkip::No` and includes the hash of the task
/// that caused the task to not be skipped - we can use this later to update the cache file quickly.
pub(crate) async fn can_skip(
&self,
lock_file: &LockFileDerivedData<'_>,
) -> Result<CanSkip, std::io::Error> {
tracing::info!("Checking if task can be skipped");
let cache_name = self.cache_name();
let cache_file = self.project().task_cache_folder().join(cache_name);
if cache_file.exists() {
let cache = tokio::fs::read_to_string(&cache_file).await?;
let cache: TaskCache = serde_json::from_str(&cache)?;
let hash = TaskHash::from_task(self, &lock_file.lock_file).await;
if let Ok(Some(hash)) = hash {
if hash.computation_hash() != cache.hash {
return Ok(CanSkip::No(Some(hash)));
} else {
return Ok(CanSkip::Yes);
}
}
}
Ok(CanSkip::No(None))
}

/// Saves the cache of the task. This function will update the cache file with the new hash of
/// the task (inputs and outputs). If the task has no hash, it will not save the cache.
pub(crate) async fn save_cache(
&self,
lock_file: &LockFileDerivedData<'_>,
previous_hash: Option<TaskHash>,
) -> Result<(), CacheUpdateError> {
let task_cache_folder = self.project().task_cache_folder();
let cache_file = task_cache_folder.join(self.cache_name());
let new_hash = if let Some(mut previous_hash) = previous_hash {
previous_hash.update_output(self).await?;
previous_hash
} else if let Some(hash) = TaskHash::from_task(self, &lock_file.lock_file).await? {
hash
} else {
return Ok(());
};

tokio::fs::create_dir_all(&task_cache_folder).await?;

let cache = TaskCache {
hash: new_hash.computation_hash(),
};
let cache = serde_json::to_string(&cache)?;
Ok(tokio::fs::write(&cache_file, cache).await?)
}
}

/// A helper object that implements [`Display`] to display (with ascii color) the command of the
Expand Down
Loading

0 comments on commit 9fb8cae

Please sign in to comment.