Skip to content

Commit

Permalink
feat: cache source code too (#1226)
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfv authored Dec 4, 2024
1 parent 13f9a14 commit 5f0a3e5
Show file tree
Hide file tree
Showing 16 changed files with 257 additions and 170 deletions.
14 changes: 8 additions & 6 deletions src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,14 @@ pub async fn run_build(

let directories = output.build_configuration.directories.clone();

let output = output
.fetch_sources(tool_configuration)
.await
.into_diagnostic()?;

let output = output.build_or_fetch_cache(tool_configuration).await?;
let output = if output.recipe.cache.is_some() {
output.build_or_fetch_cache(tool_configuration).await?
} else {
output
.fetch_sources(tool_configuration)
.await
.into_diagnostic()?
};

let output = output
.resolve_dependencies(tool_configuration)
Expand Down
235 changes: 117 additions & 118 deletions src/cache.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
//! Functions to deal with the build cache
use std::{
collections::{BTreeMap, HashSet},
path::{Path, PathBuf},
path::PathBuf,
};

use fs_err as fs;
use memchr::memmem;
use memmap2::Mmap;
use miette::{Context, IntoDiagnostic};
use minijinja::Value;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};

use crate::{
env_vars,
metadata::{build_reindexed_channels, Output},
packaging::{contains_prefix_binary, contains_prefix_text, content_type, Files},
recipe::parser::{Dependency, Requirements},
packaging::Files,
recipe::{
parser::{Dependency, Requirements, Source},
Jinja,
},
render::resolved_dependencies::{
install_environments, resolve_dependencies, FinalizedDependencies,
},
source::copy_dir::{copy_file, create_symlink, CopyOptions},
source::{
copy_dir::{copy_file, CopyDir, CopyOptions},
fetch_sources,
},
};

/// Error type for cache key generation
Expand All @@ -39,10 +44,19 @@ pub enum CacheKeyError {
pub struct Cache {
/// The requirements that were used to build the cache
pub requirements: Requirements,

/// The finalized dependencies
pub finalized_dependencies: FinalizedDependencies,

/// The finalized sources
pub finalized_sources: Vec<Source>,

/// The prefix files that are included in the cache
pub prefix_files: Vec<(PathBuf, bool)>,
pub prefix_files: Vec<PathBuf>,

/// The (dirty) source files that are included in the cache
pub work_dir_files: Vec<PathBuf>,

/// The prefix that was used at build time (needs to be replaced when
/// restoring the files)
pub prefix: PathBuf,
Expand Down Expand Up @@ -88,74 +102,62 @@ impl Output {
self.build_configuration.build_platform.platform.to_string(),
);

let cache_key = (cache, selected_variant);
let cache_key = (cache, selected_variant, self.prefix());
// serialize to json and hash
let mut hasher = Sha256::new();
let serialized = serde_json::to_string(&cache_key)?;
hasher.update(serialized.as_bytes());
let result = hasher.finalize();
Ok(format!("{:x}", result))
cache_key.serialize(&mut serde_json::Serializer::new(&mut hasher))?;
Ok(format!("{:x}", hasher.finalize()))
} else {
Err(CacheKeyError::NoCacheKeyAvailable)
}
}

/// Restore an existing cache from a cache directory
async fn restore_cache(&self, cache_dir: PathBuf) -> Result<Output, miette::Error> {
let cache: Cache = serde_json::from_str(
&fs::read_to_string(cache_dir.join("cache.json")).into_diagnostic()?,
async fn restore_cache(
&self,
cache: Cache,
cache_dir: PathBuf,
) -> Result<Output, miette::Error> {
let cache_prefix_dir = cache_dir.join("prefix");
let copied_prefix = CopyDir::new(&cache_prefix_dir, self.prefix())
.run()
.into_diagnostic()?;

// restore the work dir files
let cache_dir_work = cache_dir.join("work_dir");
let copied_cache = CopyDir::new(
&cache_dir_work,
&self.build_configuration.directories.work_dir,
)
.run()
.into_diagnostic()?;
let copy_options = CopyOptions {
skip_exist: true,
..Default::default()
};
let cache_prefix = cache.prefix;

let mut paths_created = HashSet::new();
for (file, has_prefix) in &cache.prefix_files {
tracing::info!("Restoring from cache: {:?}", file);
let dest = self.prefix().join(file);
let source = &cache_dir.join("prefix").join(file);
copy_file(source, &dest, &mut paths_created, &copy_options).into_diagnostic()?;

// check if the symlink starts with the old prefix, and if yes, make the symlink
// absolute with the new prefix
if source.is_symlink() {
let symlink_target = fs::read_link(source).into_diagnostic()?;
if let Ok(rest) = symlink_target.strip_prefix(&cache_prefix) {
let new_symlink_target = self.prefix().join(rest);
fs::remove_file(&dest).into_diagnostic()?;
create_symlink(&new_symlink_target, &dest).into_diagnostic()?;
}
}

if *has_prefix {
replace_prefix(&dest, &cache_prefix, self.prefix())?;
}
}
let combined_files = copied_prefix.copied_paths().len() + copied_cache.copied_paths().len();
tracing::info!(
"Restored {} source and prefix files from cache",
combined_files
);

Ok(Output {
finalized_cache_dependencies: Some(cache.finalized_dependencies.clone()),
finalized_cache_sources: Some(cache.finalized_sources.clone()),
..self.clone()
})
}

/// This will fetch sources and build the cache if it doesn't exist
/// Note: this modifies the output in place
pub(crate) async fn build_or_fetch_cache(
&self,
mut self,
tool_configuration: &crate::tool_configuration::Configuration,
) -> Result<Self, miette::Error> {
// if we don't have a cache, we need to run the cache build with our current
// workdir, and then return the cache
let span = tracing::info_span!("Running cache build");
let _enter = span.enter();

let target_platform = self.build_configuration.target_platform;
let mut env_vars = env_vars::vars(self, "BUILD");
env_vars.extend(env_vars::os_vars(self.prefix(), &target_platform));
if let Some(cache) = self.recipe.cache.clone() {
// if we don't have a cache, we need to run the cache build with our current
// workdir, and then return the cache
let span = tracing::info_span!("Running cache build");
let _enter = span.enter();

if let Some(cache) = &self.recipe.cache {
tracing::info!("Cache key: {:?}", self.cache_key().into_diagnostic()?);
tracing::info!("using cache key: {:?}", self.cache_key().into_diagnostic()?);
let cache_key = format!("bld_{}", self.cache_key().into_diagnostic()?);

let cache_dir = self
Expand All @@ -166,24 +168,66 @@ impl Output {

// restore the cache if it exists by copying the files to the prefix
if cache_dir.exists() {
tracing::info!("Restoring cache from {:?}", cache_dir);
return self.restore_cache(cache_dir).await;
let text = fs::read_to_string(cache_dir.join("cache.json")).into_diagnostic()?;
match serde_json::from_str::<Cache>(&text) {
Ok(cache) => {
tracing::info!("Restoring cache from {:?}", cache_dir);
self = self
.fetch_sources(tool_configuration)
.await
.into_diagnostic()?;
return self.restore_cache(cache, cache_dir).await;
}
Err(e) => {
tracing::error!(
"Failed to parse cache at {}: {:?} - rebuilding",
cache_dir.join("cache.json").display(),
e
);
// remove the cache dir and run as normal
fs::remove_dir_all(&cache_dir).into_diagnostic()?;
}
}
}

// fetch the sources for the `cache` section
let rendered_sources = fetch_sources(
self.finalized_cache_sources
.as_ref()
.unwrap_or(&cache.source),
&self.build_configuration.directories,
&self.system_tools,
tool_configuration,
)
.await
.into_diagnostic()?;

let target_platform = self.build_configuration.target_platform;
let mut env_vars = env_vars::vars(&self, "BUILD");
env_vars.extend(env_vars::os_vars(self.prefix(), &target_platform));

// Reindex the channels
let channels = build_reindexed_channels(&self.build_configuration, tool_configuration)
.into_diagnostic()
.context("failed to reindex output channel")?;

let finalized_dependencies =
resolve_dependencies(&cache.requirements, self, &channels, tool_configuration)
resolve_dependencies(&cache.requirements, &self, &channels, tool_configuration)
.await
.unwrap();

install_environments(self, &finalized_dependencies, tool_configuration)
install_environments(&self, &finalized_dependencies, tool_configuration)
.await
.into_diagnostic()?;

let selector_config = self.build_configuration.selector_config();
let mut jinja = Jinja::new(selector_config.clone());
for (k, v) in self.recipe.context.iter() {
jinja
.context_mut()
.insert(k.clone(), Value::from_safe_string(v.clone()));
}

cache
.build
.script()
Expand All @@ -193,7 +237,7 @@ impl Output {
&self.build_configuration.directories.recipe_dir,
&self.build_configuration.directories.host_prefix,
Some(&self.build_configuration.directories.build_prefix),
None, // TODO fix this to be proper Jinja context
Some(jinja),
)
.await
.into_diagnostic()?;
Expand All @@ -213,6 +257,7 @@ impl Output {
let mut creation_cache = HashSet::new();
let mut copied_files = Vec::new();
let copy_options = CopyOptions::default();

for file in &new_files.new_files {
// skip directories (if they are not a symlink)
// directories are implicitly created by the files
Expand All @@ -224,28 +269,24 @@ impl Output {
.expect("File should be in prefix");
let dest = &prefix_cache_dir.join(stripped);
copy_file(file, dest, &mut creation_cache, &copy_options).into_diagnostic()?;

// Defend against broken symlinks here!
if !file.is_symlink() {
// check if the file contains the prefix
let content_type = content_type(file).into_diagnostic()?;
let has_prefix = if content_type.map(|c| c.is_text()).unwrap_or(false) {
contains_prefix_text(file, self.prefix(), self.target_platform())
} else {
contains_prefix_binary(file, self.prefix())
}
.into_diagnostic()?;
copied_files.push((stripped.to_path_buf(), has_prefix));
} else {
copied_files.push((stripped.to_path_buf(), false));
}
copied_files.push(stripped.to_path_buf());
}

// We also need to copy the work dir files to the cache
let work_dir_files = CopyDir::new(
&self.build_configuration.directories.work_dir.clone(),
&cache_dir.join("work_dir"),
)
.run()
.into_diagnostic()?;

// save the cache
let cache = Cache {
requirements: cache.requirements.clone(),
finalized_dependencies: finalized_dependencies.clone(),
finalized_sources: rendered_sources.clone(),
prefix_files: copied_files,
work_dir_files: work_dir_files.copied_paths().to_vec(),
prefix: self.prefix().to_path_buf(),
};

Expand All @@ -254,53 +295,11 @@ impl Output {

Ok(Output {
finalized_cache_dependencies: Some(finalized_dependencies),
..self.clone()
finalized_cache_sources: Some(rendered_sources),
..self
})
} else {
Ok(self.clone())
Ok(self)
}
}
}

/// Simple replace prefix function that does a direct replacement without any
/// padding considerations because we know that the prefix is the same length as
/// the original prefix.
fn replace_prefix(file: &Path, old_prefix: &Path, new_prefix: &Path) -> Result<(), miette::Error> {
// mmap the file, and use the fast string search to find the prefix
let output = {
let map_file = fs::File::open(file).into_diagnostic()?;
let mmap = unsafe { Mmap::map(&map_file).into_diagnostic()? };
let new_prefix_bytes = new_prefix.as_os_str().as_encoded_bytes();
let old_prefix_bytes = old_prefix.as_os_str().as_encoded_bytes();

// if the prefix is the same, we don't need to do anything
if old_prefix == new_prefix {
return Ok(());
}

assert_eq!(
new_prefix_bytes.len(),
old_prefix_bytes.len(),
"Prefixes must have the same length: {:?} != {:?}",
new_prefix,
old_prefix
);

let mut output = Vec::with_capacity(mmap.len());
let mut last_match_end = 0;
let finder = memmem::Finder::new(old_prefix_bytes);

while let Some(index) = finder.find(&mmap[last_match_end..]) {
let absolute_index = last_match_end + index;
output.extend_from_slice(&mmap[last_match_end..absolute_index]);
output.extend_from_slice(new_prefix_bytes);
last_match_end = absolute_index + new_prefix_bytes.len();
}
output.extend_from_slice(&mmap[last_match_end..]);
output
// close file & mmap at end of scope
};

// overwrite the file
fs::write(file, output).into_diagnostic()
}
Loading

0 comments on commit 5f0a3e5

Please sign in to comment.