Skip to content

Commit

Permalink
Support globs as cache keys
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Sep 10, 2024
1 parent cfa9299 commit 9abc707
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 17 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/uv-cache-info/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ workspace = true

[dependencies]
fs-err = { workspace = true }
glob = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
Expand Down
95 changes: 79 additions & 16 deletions crates/uv-cache-info/src/cache_info.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use crate::commit_info::CacheCommit;
use crate::timestamp::Timestamp;

use glob::MatchOptions;
use serde::Deserialize;
use std::cmp::max;
use std::io;
use std::path::{Path, PathBuf};
use tracing::debug;
use tracing::{debug, warn};

/// The information used to determine whether a built distribution is up-to-date, based on the
/// timestamps of relevant files, the current commit of a repository, etc.
Expand Down Expand Up @@ -64,24 +65,81 @@ impl CacheInfo {
// If no cache keys were defined, use the defaults.
let cache_keys = cache_keys.unwrap_or_else(|| {
vec![
CacheKey::Path(directory.join("pyproject.toml")),
CacheKey::Path(directory.join("setup.py")),
CacheKey::Path(directory.join("setup.cfg")),
CacheKey::Path("pyproject.toml".to_string()),
CacheKey::Path("setup.py".to_string()),
CacheKey::Path("setup.cfg".to_string()),
]
});

// Incorporate any additional timestamps or VCS information.
for cache_key in &cache_keys {
match cache_key {
CacheKey::Path(file) | CacheKey::File { file } => {
timestamp = max(
timestamp,
file.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
.as_ref()
.map(Timestamp::from_metadata),
);
if file.chars().any(|c| matches!(c, '*' | '?' | '[')) {
// Treat the path as a glob.
let path = directory.join(file);
let Some(pattern) = path.to_str() else {
warn!("Failed to convert pattern to string: {}", path.display());
continue;
};
let paths = match glob::glob_with(
pattern,
MatchOptions {
case_sensitive: false,
require_literal_separator: true,
require_literal_leading_dot: false,
},
) {
Ok(paths) => paths,
Err(err) => {
warn!("Failed to parse glob pattern: {err}");
continue;
}
};
for entry in paths {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for glob entry: {err}");
continue;
}
};
if metadata.is_file() {
timestamp =
max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
entry.display()
);
}
}
} else {
// Treat the path as a file.
let path = directory.join(file);
let metadata = match path.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for file: {err}");
continue;
}
};
if metadata.is_file() {
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
path.display()
);
}
}
}
CacheKey::Git { git: true } => match CacheCommit::from_repository(directory) {
Ok(commit_info) => commit = Some(commit_info),
Expand Down Expand Up @@ -165,10 +223,15 @@ struct ToolUv {
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
pub enum CacheKey {
/// Ex) `"Cargo.lock"`
Path(PathBuf),
/// Ex) `{ file = "Cargo.lock" }`
File { file: PathBuf },
/// Ex) `"Cargo.lock"` or `"**/*.toml"`
Path(String),
/// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }`
File { file: String },
/// Ex) `{ git = true }`
Git { git: bool },
}

pub enum FilePattern {
Glob(String),
Path(PathBuf),
}
7 changes: 6 additions & 1 deletion crates/uv-settings/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,18 @@ pub struct Options {
/// to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in
/// addition to watching the `pyproject.toml`).
///
/// Globs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html)
/// crate. For example, to invalidate the cache whenever a `.toml` file in the project directory
/// or any of its subdirectories is modified, you can specify `cache-keys = [{ file = "**/*.toml" }]`.
///
/// Cache keys can also include version control information. For example, if a project uses
/// `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]`
/// to include the current Git commit hash in the cache key (in addition to the
/// `pyproject.toml`).
///
/// Cache keys only affect the project defined by the `pyproject.toml` in which they're
/// specified (as opposed to, e.g., affecting all members in a workspace).
/// specified (as opposed to, e.g., affecting all members in a workspace), and all paths and
/// globs are interpreted as relative to the project directory.
#[option(
default = r#"[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]"#,
value_type = "list[dict]",
Expand Down
56 changes: 56 additions & 0 deletions crates/uv/tests/pip_install.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3265,6 +3265,62 @@ fn invalidate_path_on_cache_key() -> Result<()> {
"###
);

// Modify the `pyproject.toml` to use a glob.
pyproject_toml.write_str(
r#"[project]
name = "example"
version = "0.0.0"
dependencies = ["anyio==4.0.0"]
requires-python = ">=3.8"
[tool.uv]
cache-keys = [{ file = "**/*.txt" }]
"#,
)?;

// Write a new file.
editable_dir
.child("resources")
.child("data.txt")
.write_str("data")?;

// Installing again should update the package.
uv_snapshot!(context.filters(), context.pip_install()
.arg("example @ .")
.current_dir(editable_dir.path()), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 4 packages in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ example==0.0.0 (from file://[TEMP_DIR]/editable)
"###
);

// Write a new file in the current directory.
editable_dir.child("data.txt").write_str("data")?;

// Installing again should update the package.
uv_snapshot!(context.filters(), context.pip_install()
.arg("example @ .")
.current_dir(editable_dir.path()), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 4 packages in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ example==0.0.0 (from file://[TEMP_DIR]/editable)
"###
);

Ok(())
}

Expand Down
10 changes: 10 additions & 0 deletions docs/concepts/cache.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ the following to the project's `pyproject.toml`:
cache-keys = [{ file = "requirements.txt" }]
```

Globs are supported, following the syntax of the
[`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) crate. For example, to invalidate the
cache whenever a `.toml` file in the project directory or any of its subdirectories is modified, use
the following:

```toml title="pyproject.toml"
[tool.uv]
cache-keys = [{ file = "**/*.toml" }]
```

As an escape hatch, if a project uses `dynamic` metadata that isn't covered by `tool.uv.cache-keys`,
you can instruct uv to _always_ rebuild and reinstall it by adding the project to the
`tool.uv.reinstall-package` list:
Expand Down

0 comments on commit 9abc707

Please sign in to comment.