Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] preprocessor PoC #198

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/artifacts/solc/src/ast/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{fmt, fmt::Write, str::FromStr};
/// Represents the source location of a node: `<start byte>:<length>:<source index>`.
///
/// The `start`, `length` and `index` can be -1 which is represented as `None`
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct SourceLocation {
pub start: Option<usize>,
pub length: Option<usize>,
Expand Down
169 changes: 109 additions & 60 deletions crates/compilers/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::{
buildinfo::RawBuildInfo,
compilers::{Compiler, CompilerSettings, Language},
output::Builds,
preprocessor::interface_representation_hash,
resolver::GraphEdges,
ArtifactFile, ArtifactOutput, Artifacts, ArtifactsMap, Graph, OutputContext, Project,
ProjectPaths, ProjectPathsConfig, SourceCompilationKind,
Expand Down Expand Up @@ -405,6 +406,8 @@ pub struct CacheEntry<S = Settings> {
pub last_modification_date: u64,
/// hash to identify whether the content of the file changed
pub content_hash: String,
/// hash of the interface representation of the file, if it's a source file
pub interface_repr_hash: Option<String>,
/// identifier name see [`foundry_compilers_core::utils::source_name()`]
pub source_name: PathBuf,
/// what config was set when compiling this file
Expand Down Expand Up @@ -620,9 +623,18 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput, C: Compiler> {

/// The file hashes.
pub content_hashes: HashMap<PathBuf, String>,

/// The interface representations for source files.
pub interface_repr_hashes: HashMap<PathBuf, String>,
}

impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> {
/// Whther given file is a source file or a test/script file.
fn is_source_file(&self, file: &Path) -> bool {
!file.starts_with(&self.project.paths.tests)
&& !file.starts_with(&self.project.paths.scripts)
}

/// Creates a new cache entry for the file
fn create_cache_entry(&mut self, file: PathBuf, source: &Source) {
let imports = self
Expand All @@ -632,10 +644,14 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> {
.map(|import| strip_prefix(import, self.project.root()).into())
.collect();

let interface_repr_hash =
self.is_source_file(&file).then(|| interface_representation_hash(source));

let entry = CacheEntry {
last_modification_date: CacheEntry::<C::Settings>::read_last_modification_date(&file)
.unwrap_or_default(),
content_hash: source.content_hash(),
interface_repr_hash,
source_name: strip_prefix(&file, self.project.root()).into(),
compiler_settings: self.project.settings.clone(),
imports,
Expand Down Expand Up @@ -730,104 +746,128 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> {
return true;
}

false
}

// Walks over all cache entires, detects dirty files and removes them from cache.
fn find_and_remove_dirty(&mut self) {
fn populate_dirty_files<D>(
file: &Path,
dirty_files: &mut HashSet<PathBuf>,
edges: &GraphEdges<D>,
) {
for file in edges.importers(file) {
// If file is marked as dirty we either have already visited it or it was marked as
// dirty initially and will be visited at some point later.
if !dirty_files.contains(file) {
dirty_files.insert(file.to_path_buf());
populate_dirty_files(file, dirty_files, edges);
// If any requested extra files are missing for any artifact, mark source as dirty to
// generate them
for artifacts in self.cached_artifacts.values() {
for artifacts in artifacts.values() {
for artifact_file in artifacts {
if self.project.artifacts_handler().is_dirty(artifact_file).unwrap_or(true) {
return true;
}
}
}
}

// Iterate over existing cache entries.
let files = self.cache.files.keys().cloned().collect::<HashSet<_>>();
false
}

// Walks over all cache entires, detects dirty files and removes them from cache.
fn find_and_remove_dirty(&mut self) {
let mut sources = Sources::new();

// Read all sources, marking entries as dirty on I/O errors.
for file in &files {
let Ok(source) = Source::read(file) else {
self.dirty_sources.insert(file.clone());
// Read all sources, removing entries on I/O errors.
for file in self.cache.files.keys().cloned().collect::<Vec<_>>() {
let Ok(source) = Source::read(&file) else {
self.cache.files.remove(&file);
continue;
};
sources.insert(file.clone(), source);
}

// Build a temporary graph for walking imports. We need this because `self.edges`
// only contains graph data for in-scope sources but we are operating on cache entries.
if let Ok(graph) = Graph::<C::ParsedSource>::resolve_sources(&self.project.paths, sources) {
let (sources, edges) = graph.into_sources();
// Calculate content hashes for later comparison.
self.fill_hashes(&sources);

// Calculate content hashes for later comparison.
self.fill_hashes(&sources);
// Pre-add all sources that are guaranteed to be dirty
for file in self.cache.files.keys() {
if self.is_dirty_impl(file, false) {
self.dirty_sources.insert(file.clone());
}
}

// Pre-add all sources that are guaranteed to be dirty
for file in sources.keys() {
if self.is_dirty_impl(file) {
// Build a temporary graph for populating cache. We want to ensure that we preserve all just
// removed entries with updated data. We need separate graph for this because
// `self.edges` only contains graph data for in-scope sources but we are operating on cache
// entries.
let Ok(graph) = Graph::<C::ParsedSource>::resolve_sources(&self.project.paths, sources)
else {
// Purge all sources on graph resolution error.
self.cache.files.clear();
return;
};

let (sources, edges) = graph.into_sources();

// Mark sources as dirty based on their imports
for file in sources.keys() {
if self.dirty_sources.contains(file) {
continue;
}
let is_src = self.is_source_file(file);
for import in edges.imports(file) {
// Any source file importing dirty source file is dirty.
if is_src && self.dirty_sources.contains(import) {
self.dirty_sources.insert(file.clone());
break;
// For non-src files we mark them as dirty only if they import dirty non-src file
// or src file for which interface representation changed.
} else if !is_src
&& self.dirty_sources.contains(import)
&& (!self.is_source_file(import) || self.is_dirty_impl(import, true))
{
self.dirty_sources.insert(file.clone());
}
}

// Perform DFS to find direct/indirect importers of dirty files.
for file in self.dirty_sources.clone().iter() {
populate_dirty_files(file, &mut self.dirty_sources, &edges);
}
} else {
// Purge all sources on graph resolution error.
self.dirty_sources.extend(files);
}

// Remove all dirty files from cache.
for file in &self.dirty_sources {
debug!("removing dirty file from cache: {}", file.display());
self.cache.remove(file);
}
}

fn is_dirty_impl(&self, file: &Path) -> bool {
let Some(hash) = self.content_hashes.get(file) else {
trace!("missing content hash");
return true;
};
// Create new entries for all source files
for (file, source) in sources {
if self.cache.files.contains_key(&file) {
continue;
}

self.create_cache_entry(file.clone(), &source);
}
}

fn is_dirty_impl(&self, file: &Path, use_interface_repr: bool) -> bool {
let Some(entry) = self.cache.entry(file) else {
trace!("missing cache entry");
return true;
};

if entry.content_hash != *hash {
trace!("content hash changed");
return true;
if use_interface_repr {
let Some(interface_hash) = self.interface_repr_hashes.get(file) else {
trace!("missing interface hash");
return true;
};

if entry.interface_repr_hash.as_ref().map_or(true, |h| h != interface_hash) {
trace!("interface hash changed");
return true;
};
} else {
let Some(content_hash) = self.content_hashes.get(file) else {
trace!("missing content hash");
return true;
};

if entry.content_hash != *content_hash {
trace!("content hash changed");
return true;
}
}

if !self.project.settings.can_use_cached(&entry.compiler_settings) {
trace!("solc config not compatible");
return true;
}

// If any requested extra files are missing for any artifact, mark source as dirty to
// generate them
for artifacts in self.cached_artifacts.values() {
for artifacts in artifacts.values() {
for artifact_file in artifacts {
if self.project.artifacts_handler().is_dirty(artifact_file).unwrap_or(true) {
return true;
}
}
}
}

// all things match, can be reused
false
}
Expand All @@ -838,6 +878,14 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> {
if let hash_map::Entry::Vacant(entry) = self.content_hashes.entry(file.clone()) {
entry.insert(source.content_hash());
}
// Fill interface representation hashes for source files
if self.is_source_file(&file) {
if let hash_map::Entry::Vacant(entry) =
self.interface_repr_hashes.entry(file.clone())
{
entry.insert(interface_representation_hash(&source));
}
}
}
}
}
Expand Down Expand Up @@ -921,6 +969,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCache<'a, T, C> {
dirty_sources: Default::default(),
content_hashes: Default::default(),
sources_in_scope: Default::default(),
interface_repr_hashes: Default::default(),
};

ArtifactsCache::Cached(cache)
Expand Down
39 changes: 32 additions & 7 deletions crates/compilers/src/compile/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,23 +109,36 @@ use crate::{
output::{AggregatedCompilerOutput, Builds},
report,
resolver::GraphEdges,
ArtifactOutput, CompilerSettings, Graph, Project, ProjectCompileOutput, Sources,
ArtifactOutput, CompilerSettings, Graph, Project, ProjectCompileOutput, ProjectPathsConfig,
Sources,
};
use foundry_compilers_core::error::Result;
use rayon::prelude::*;
use semver::Version;
use std::{collections::HashMap, path::PathBuf, time::Instant};
use std::{collections::HashMap, fmt::Debug, path::PathBuf, time::Instant};

/// A set of different Solc installations with their version and the sources to be compiled
pub(crate) type VersionedSources<L> = HashMap<L, HashMap<Version, Sources>>;

/// Invoked before the actual compiler invocation and can override the input.
pub trait Preprocessor<C: Compiler>: Debug {
fn preprocess(
&self,
compiler: &C,
input: C::Input,
paths: &ProjectPathsConfig<C::Language>,
) -> Result<C::Input>;
}

#[derive(Debug)]
pub struct ProjectCompiler<'a, T: ArtifactOutput, C: Compiler> {
/// Contains the relationship of the source files and their imports
edges: GraphEdges<C::ParsedSource>,
project: &'a Project<C, T>,
/// how to compile all the sources
sources: CompilerSources<C::Language>,
/// Optional preprocessor
preprocessor: Option<Box<dyn Preprocessor<C>>>,
}

impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> {
Expand Down Expand Up @@ -160,7 +173,11 @@ impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> {
sources,
};

Ok(Self { edges, project, sources })
Ok(Self { edges, project, sources, preprocessor: None })
}

pub fn with_preprocessor(self, preprocessor: impl Preprocessor<C> + 'static) -> Self {
Self { preprocessor: Some(Box::new(preprocessor)), ..self }
}

/// Compiles all the sources of the `Project` in the appropriate mode
Expand Down Expand Up @@ -197,7 +214,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> {
/// - check cache
fn preprocess(self) -> Result<PreprocessedState<'a, T, C>> {
trace!("preprocessing");
let Self { edges, project, mut sources } = self;
let Self { edges, project, mut sources, preprocessor } = self;

// convert paths on windows to ensure consistency with the `CompilerOutput` `solc` emits,
// which is unix style `/`
Expand All @@ -207,7 +224,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> {
// retain and compile only dirty sources and all their imports
sources.filter(&mut cache);

Ok(PreprocessedState { sources, cache })
Ok(PreprocessedState { sources, cache, preprocessor })
}
}

Expand All @@ -221,15 +238,18 @@ struct PreprocessedState<'a, T: ArtifactOutput, C: Compiler> {

/// Cache that holds `CacheEntry` objects if caching is enabled and the project is recompiled
cache: ArtifactsCache<'a, T, C>,

/// Optional preprocessor
preprocessor: Option<Box<dyn Preprocessor<C>>>,
}

impl<'a, T: ArtifactOutput, C: Compiler> PreprocessedState<'a, T, C> {
/// advance to the next state by compiling all sources
fn compile(self) -> Result<CompiledState<'a, T, C>> {
trace!("compiling");
let PreprocessedState { sources, mut cache } = self;
let PreprocessedState { sources, mut cache, preprocessor } = self;

let mut output = sources.compile(&mut cache)?;
let mut output = sources.compile(&mut cache, preprocessor)?;

// source paths get stripped before handing them over to solc, so solc never uses absolute
// paths, instead `--base-path <root dir>` is set. this way any metadata that's derived from
Expand Down Expand Up @@ -410,6 +430,7 @@ impl<L: Language> CompilerSources<L> {
fn compile<C: Compiler<Language = L>, T: ArtifactOutput>(
self,
cache: &mut ArtifactsCache<'_, T, C>,
preprocessor: Option<Box<dyn Preprocessor<C>>>,
) -> Result<AggregatedCompilerOutput<C>> {
let project = cache.project();
let graph = cache.graph();
Expand Down Expand Up @@ -456,6 +477,10 @@ impl<L: Language> CompilerSources<L> {

input.strip_prefix(project.paths.root.as_path());

if let Some(preprocessor) = preprocessor.as_ref() {
input = preprocessor.preprocess(&project.compiler, input, &project.paths)?;
}

jobs.push((input, actually_dirty));
}
}
Expand Down
Loading