Skip to content

Commit

Permalink
refactor: use http-cache for caching, optimize network calls (#304)
Browse files Browse the repository at this point in the history
* refactor: use http-cache for caching

* get it working

* use tokio MT

Not sure this matters, given that it's all essentially
serialized to the same thread anyways.

* optimize impostor-commit lookup pattern

* ref-confusion: optimize GH API use

* use an appropriate cache dir

* docs: document cache

* fix test

* perf: reuse the same branches API
  • Loading branch information
woodruffw authored Dec 16, 2024
1 parent 66c6fb6 commit 6a6e0ca
Show file tree
Hide file tree
Showing 10 changed files with 746 additions and 266 deletions.
699 changes: 552 additions & 147 deletions Cargo.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ clap = { version = "4.5.23", features = ["derive", "env"] }
clap-verbosity-flag = { version = "3.0.0", features = [
"tracing",
], default-features = false }
etcetera = "0.8.0"
github-actions-models = "0.14.0"
http-cache-reqwest = "0.15.0"
human-panic = "2.0.1"
indexmap = "2.7.0"
indicatif = "0.17.9"
itertools = "0.13.0"
moka = { version = "0.12.8", features = ["sync"] }
owo-colors = "4.1.0"
pest = "2.7.15"
pest_derive = "2.7.15"
Expand All @@ -36,11 +37,13 @@ reqwest = { version = "0.12.9", features = [
"json",
"rustls-tls",
], default-features = false }
reqwest-middleware = "0.4.0"
serde = { version = "1.0.215", features = ["derive"] }
serde-sarif = "0.6.5"
serde_json = "1.0.133"
serde_yaml = "0.9.34"
terminal-link = "0.1.0"
tokio = { version = "1.42.0", features = ["rt-multi-thread"] }
tracing = "0.1.41"
tracing-indicatif = "0.3.8"
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
Expand Down
2 changes: 2 additions & 0 deletions docs/snippets/help.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ Options:
Filter all results below this severity [possible values: unknown, informational, low, medium, high]
--min-confidence <MIN_CONFIDENCE>
Filter all results below this confidence [possible values: unknown, low, medium, high]
--cache-dir <CACHE_DIR>
The directory to use for HTTP caching. By default, a host-appropriate user-caching directory will be used
-h, --help
Print help (see more with '--help')
-V, --version
Expand Down
27 changes: 27 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,33 @@ zizmor --config my-zizmor-config.yml /dir/to/audit
See [Configuration: `rules.<id>.ignore`](./configuration.md#rulesidignore) for
more details on writing ignore rules.

## Caching between runs

!!! tip

Persistent caching (between runs of `zizmor`) is available in `v0.10.0` and later.

!!! warning

Caches can contain sensitive metadata, especially when auditing private
repositories! Think twice before sharing your cache or reusing
it across machine/visibility boundaries.

`zizmor` caches HTTP responses from GitHub's REST APIs to speed up individual
audits. This HTTP cache is persisted and re-used between runs as well.

By default `zizmor` will cache to an appropriate user-level caching directory:

* Linux and macOS: `$XDG_CACHE_DIR` (`~/.cache/zizmor` by default)
* Windows: `~\AppData\Roaming\woodruffw\zizmor`.

To override the default caching directory, pass `--cache-dir`:

```bash
# cache in /tmp instead
zizmor --cache-dir /tmp/zizmor ...
```

## Integration

### Use in GitHub Actions
Expand Down
4 changes: 2 additions & 2 deletions src/audit/github_env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ impl WorkflowAudit for GitHubEnv {
mod tests {
use crate::audit::github_env::{GitHubEnv, GITHUB_ENV_WRITE_CMD};
use crate::audit::WorkflowAudit;
use crate::state::{AuditState, Caches};
use crate::state::AuditState;

#[test]
fn test_exploitable_bash_patterns() {
Expand Down Expand Up @@ -173,8 +173,8 @@ mod tests {
] {
let audit_state = AuditState {
no_online_audits: false,
cache_dir: "/tmp/zizmor".into(),
gh_token: None,
caches: Caches::new(),
};

let sut = GitHubEnv::new(audit_state).expect("failed to create audit");
Expand Down
24 changes: 11 additions & 13 deletions src/audit/impostor_commit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use github_actions_models::workflow::Job;
use super::{audit_meta, WorkflowAudit};
use crate::{
finding::{Confidence, Finding, Severity},
github_api::{self, Branch, ComparisonStatus, Tag},
github_api::{self, ComparisonStatus},
models::{RepositoryUses, Uses, Workflow},
state::AuditState,
};
Expand All @@ -29,12 +29,6 @@ audit_meta!(
);

impl ImpostorCommit {
fn named_refs(&self, uses: RepositoryUses<'_>) -> Result<(Vec<Branch>, Vec<Tag>)> {
let branches = self.client.list_branches(uses.owner, uses.repo)?;
let tags = self.client.list_tags(uses.owner, uses.repo)?;
Ok((branches, tags))
}

fn named_ref_contains_commit(
&self,
uses: &RepositoryUses<'_>,
Expand Down Expand Up @@ -67,18 +61,22 @@ impl ImpostorCommit {
return Ok(false);
};

let (branches, tags) = self.named_refs(uses)?;

// Fast path: almost all commit refs will be at the tip of
// the branch or tag's history, so check those first.
for branch in &branches {
if branch.commit.sha == head_ref {
// Check tags before branches, since in practice version tags
// are more commonly pinned.
let tags = self.client.list_tags(uses.owner, uses.repo)?;

for tag in &tags {
if tag.commit.sha == head_ref {
return Ok(false);
}
}

for tag in &tags {
if tag.commit.sha == head_ref {
let branches = self.client.list_branches(uses.owner, uses.repo)?;

for branch in &branches {
if branch.commit.sha == head_ref {
return Ok(false);
}
}
Expand Down
13 changes: 2 additions & 11 deletions src/audit/ref_confusion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,8 @@ impl RefConfusion {
return Ok(false);
};

let branches_match = self
.client
.list_branches(uses.owner, uses.repo)?
.iter()
.any(|b| b.name == sym_ref);

let tags_match = self
.client
.list_tags(uses.owner, uses.repo)?
.iter()
.any(|t| t.name == sym_ref);
let branches_match = self.client.has_branch(uses.owner, uses.repo, sym_ref)?;
let tags_match = self.client.has_tag(uses.owner, uses.repo, sym_ref)?;

// If both the branch and tag namespaces have a match, we have a
// confusable ref.
Expand Down
Loading

0 comments on commit 6a6e0ca

Please sign in to comment.