Skip to content

Commit

Permalink
feat: track and cache context of each compiler invocation (#140)
Browse files Browse the repository at this point in the history
ref foundry-rs/foundry#7379
ref foundry-rs/foundry#4981
ref foundry-rs/foundry#2704

The way Solidity assigns `source_id`s is simply by order in which
sources are passed in compiler input. Thus, if we have two sources
`A.sol` and `B.sol`, then on the first (non-cached) compiler run,
`A.sol` will get assigned ID 1 and `B.sol` will have ID 2.

Then, if we change `B.sol` slightly and recompile, it will be the only
source in the input, so it will have ID 1.

The same ID collisions are more often appearing on multi-version and
multi-compiler builds. After many cached runs such discrepancies result
in debugger basically displaying random sources.

This PR adds a way to link an artifact to the build info for input that
produced it, thus allowing us to track correct `source_id -> source`
mapping for cached artifacts.

I've added `BuildContext` which is the foundry context we are tracking
for each compiler invocation. It is getting inlined into `BuildInfo`,
and read along with cached artifacts. `BuildContext`s are indexed by the
same IDs `BuildInfo`s are, and each `ArtifactId` now has a reference to
`build_id` which produced it.

Build info now produced on every compiler run, however, it does not
include complete input and output unless `project.build_info` is true,
to avoid overhead while keeping our internal logic working correctly.
  • Loading branch information
klkvr authored Jun 11, 2024
1 parent 2a51897 commit 3ed1e06
Show file tree
Hide file tree
Showing 19 changed files with 431 additions and 184 deletions.
1 change: 1 addition & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ allow = [
# https://github.com/briansmith/webpki/issues/148
"LicenseRef-webpki",
"BSL-1.0",
"Unicode-3.0",
]

# Allow 1 or more licenses on a per-crate basis, so that particular licenses
Expand Down
26 changes: 19 additions & 7 deletions src/artifact_output/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::{
BytecodeObject, CompactBytecode, CompactContractBytecodeCow, CompactDeployedBytecode,
FileToContractsMap, SourceFile,
},
cache::CachedArtifact,
compile::output::{contracts::VersionedContracts, sources::VersionedSourceFiles},
error::Result,
sourcemap::{SourceMap, SyntaxError},
Expand Down Expand Up @@ -40,6 +41,8 @@ pub struct ArtifactId {
pub source: PathBuf,
/// `solc` version that produced this artifact
pub version: Version,
/// `solc` build id
pub build_id: String,
}

impl ArtifactId {
Expand Down Expand Up @@ -68,7 +71,7 @@ impl ArtifactId {

/// Returns a `<source path>:<name>` slug that uniquely identifies an artifact
pub fn identifier(&self) -> String {
format!("{}:{}", self.source.to_string_lossy(), self.name)
format!("{}:{}", self.source.display(), self.name)
}

/// Returns a `<filename><version>:<name>` slug that identifies an artifact
Expand All @@ -93,6 +96,7 @@ pub struct ArtifactFile<T> {
pub file: PathBuf,
/// `solc` version that produced this artifact
pub version: Version,
pub build_id: String,
}

impl<T: Serialize> ArtifactFile<T> {
Expand Down Expand Up @@ -274,6 +278,7 @@ impl<T> Artifacts<T> {
name,
source: source.clone(),
version: artifact.version.clone(),
build_id: artifact.build_id.clone(),
}
.with_slashed_paths(),
&artifact.artifact,
Expand All @@ -299,6 +304,7 @@ impl<T> Artifacts<T> {
name,
source: source.clone(),
version: artifact.version,
build_id: artifact.build_id.clone(),
}
.with_slashed_paths(),
artifact.artifact,
Expand Down Expand Up @@ -821,9 +827,9 @@ pub trait ArtifactOutput {
// we reuse the path, this will make sure that even if there are conflicting
// files (files for witch `T::output_file()` would return the same path) we use
// consistent output paths
if let Some(existing_artifact) = ctx.existing_artifact(file, name, version).cloned() {
if let Some(existing_artifact) = ctx.existing_artifact(file, name, version) {
trace!("use existing artifact file {:?}", existing_artifact,);
existing_artifact
existing_artifact.to_path_buf()
} else {
let path = if versioned {
Self::output_file_versioned(file, name, version)
Expand Down Expand Up @@ -863,7 +869,7 @@ pub trait ArtifactOutput {
.existing_artifacts
.values()
.flat_map(|artifacts| artifacts.values().flat_map(|artifacts| artifacts.values()))
.map(|p| p.to_slash_lossy().to_lowercase())
.map(|a| a.path.to_slash_lossy().to_lowercase())
.collect::<HashSet<_>>();

let mut files = contracts.keys().collect::<Vec<_>>();
Expand Down Expand Up @@ -911,6 +917,7 @@ pub trait ArtifactOutput {
artifact,
file: artifact_path,
version: contract.version.clone(),
build_id: contract.build_id.clone(),
};

artifacts
Expand Down Expand Up @@ -969,6 +976,7 @@ pub trait ArtifactOutput {
artifact,
file: artifact_path,
version: source.version.clone(),
build_id: source.build_id.clone(),
});
}
}
Expand Down Expand Up @@ -1021,7 +1029,8 @@ pub struct OutputContext<'a> {
/// └── inner
/// └── a.sol
/// ```
pub existing_artifacts: BTreeMap<&'a Path, &'a BTreeMap<String, BTreeMap<Version, PathBuf>>>,
pub existing_artifacts:
BTreeMap<&'a Path, &'a BTreeMap<String, BTreeMap<Version, CachedArtifact>>>,
}

// === impl OutputContext
Expand All @@ -1047,9 +1056,12 @@ impl<'a> OutputContext<'a> {
file: impl AsRef<Path>,
contract: &str,
version: &Version,
) -> Option<&PathBuf> {
) -> Option<&Path> {
self.existing_artifacts.get(file.as_ref()).and_then(|contracts| {
contracts.get(contract).and_then(|versions| versions.get(version))
contracts
.get(contract)
.and_then(|versions| versions.get(version))
.map(|a| a.path.as_path())
})
}
}
Expand Down
21 changes: 18 additions & 3 deletions src/artifacts/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1981,7 +1981,14 @@ impl SourceFiles {
#[cfg(test)]
mod tests {
use super::*;
use crate::AggregatedCompilerOutput;
use crate::{
buildinfo::RawBuildInfo,
compilers::{
solc::{SolcCompiler, SolcVersionedInput},
CompilerInput,
},
AggregatedCompilerOutput,
};
use alloy_primitives::Address;

#[test]
Expand Down Expand Up @@ -2014,8 +2021,16 @@ mod tests {
sources: Default::default(),
};

let mut aggregated = AggregatedCompilerOutput::default();
aggregated.extend("0.8.12".parse().unwrap(), out_converted);
let v: Version = "0.8.12".parse().unwrap();
let input = SolcVersionedInput::build(
Default::default(),
Default::default(),
SolcLanguage::Solidity,
v.clone(),
);
let build_info = RawBuildInfo::new(&input, &out_converted, true).unwrap();
let mut aggregated = AggregatedCompilerOutput::<SolcCompiler>::default();
aggregated.extend(v, build_info, out_converted);
assert!(!aggregated.is_unchanged());
}

Expand Down
145 changes: 94 additions & 51 deletions src/buildinfo.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
//! Represents an entire build

use crate::{utils, SolcError};
use crate::{
compilers::{CompilationError, CompilerInput, CompilerOutput, Language},
error::Result,
utils,
};
use alloy_primitives::hex;
use md5::Digest;
use semver::Version;
use serde::{de::DeserializeOwned, ser::SerializeStruct, Deserialize, Serialize, Serializer};
use std::{cell::RefCell, path::Path, rc::Rc};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use std::{
collections::{BTreeMap, HashSet},
path::{Path, PathBuf},
};

pub const ETHERS_FORMAT_VERSION: &str = "ethers-rs-sol-build-info-1";

Expand All @@ -24,93 +31,129 @@ pub struct BuildInfo<I, O> {

impl<I: DeserializeOwned, O: DeserializeOwned> BuildInfo<I, O> {
/// Deserializes the `BuildInfo` object from the given file
pub fn read(path: impl AsRef<Path>) -> Result<Self, SolcError> {
pub fn read(path: impl AsRef<Path>) -> Result<Self> {
utils::read_json_file(path)
}
}

/// Additional context we cache for each compiler run.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
pub struct BuildContext<L> {
/// Mapping from internal compiler source id to path of the source file.
pub source_id_to_path: BTreeMap<u32, PathBuf>,
/// Language of the compiler.
pub language: L,
}

impl<L: Language> BuildContext<L> {
pub fn new<I, E>(input: &I, output: &CompilerOutput<E>) -> Result<Self>
where
I: CompilerInput<Language = L>,
{
let mut source_id_to_path = BTreeMap::new();

let input_sources = input.sources().map(|(path, _)| path).collect::<HashSet<_>>();
for (path, source) in output.sources.iter() {
if input_sources.contains(path.as_path()) {
source_id_to_path.insert(source.id, path.to_path_buf());
}
}

Ok(Self { source_id_to_path, language: input.language() })
}

pub fn join_all(&mut self, root: impl AsRef<Path>) {
self.source_id_to_path.values_mut().for_each(|path| {
*path = root.as_ref().join(path.as_path());
});
}

pub fn with_joined_paths(mut self, root: impl AsRef<Path>) -> Self {
self.join_all(root);
self
}
}

/// Represents `BuildInfo` object
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct RawBuildInfo {
pub struct RawBuildInfo<L> {
/// The hash that identifies the BuildInfo
pub id: String,
#[serde(flatten)]
pub build_context: BuildContext<L>,
/// serialized `BuildInfo` json
pub build_info: String,
#[serde(flatten)]
pub build_info: BTreeMap<String, serde_json::Value>,
}

// === impl RawBuildInfo ===

impl RawBuildInfo {
impl<L: Language> RawBuildInfo<L> {
/// Serializes a `BuildInfo` object
pub fn new<I: Serialize, O: Serialize>(
pub fn new<I: CompilerInput<Language = L>, E: CompilationError>(
input: &I,
output: &O,
version: &Version,
) -> serde_json::Result<RawBuildInfo> {
output: &CompilerOutput<E>,
full_build_info: bool,
) -> Result<RawBuildInfo<L>> {
let version = input.version().clone();
let build_context = BuildContext::new(input, output)?;

let mut hasher = md5::Md5::new();
let w = BuildInfoWriter { buf: Rc::new(RefCell::new(Vec::with_capacity(128))) };
let mut buf = w.clone();
let mut serializer = serde_json::Serializer::pretty(&mut buf);
let mut s = serializer.serialize_struct("BuildInfo", 6)?;
s.serialize_field("_format", &ETHERS_FORMAT_VERSION)?;

hasher.update(ETHERS_FORMAT_VERSION);

let solc_short = format!("{}.{}.{}", version.major, version.minor, version.patch);
s.serialize_field("solcVersion", &solc_short)?;
s.serialize_field("solcLongVersion", &version)?;
s.serialize_field("input", input)?;
hasher.update(&solc_short);
hasher.update(version.to_string());

let input = serde_json::to_value(input)?;
hasher.update(&serde_json::to_string(&input)?);

// create the hash for `{_format,solcVersion,solcLongVersion,input}`
// N.B. this is not exactly the same as hashing the json representation of these values but
// the must efficient one
hasher.update(&*w.buf.borrow());
let result = hasher.finalize();
let id = hex::encode(result);

s.serialize_field("id", &id)?;
s.serialize_field("output", output)?;
s.end()?;

drop(buf);
let mut build_info = BTreeMap::new();

let build_info = unsafe {
// serde_json does not emit non UTF8
String::from_utf8_unchecked(w.buf.take())
};
if full_build_info {
build_info.insert("_format".to_string(), serde_json::to_value(ETHERS_FORMAT_VERSION)?);
build_info.insert("solcVersion".to_string(), serde_json::to_value(&solc_short)?);
build_info.insert("solcLongVersion".to_string(), serde_json::to_value(&version)?);
build_info.insert("input".to_string(), input);
build_info.insert("output".to_string(), serde_json::to_value(output)?);
}

Ok(RawBuildInfo { id, build_info })
}
}

#[derive(Clone)]
struct BuildInfoWriter {
buf: Rc<RefCell<Vec<u8>>>,
}

impl std::io::Write for BuildInfoWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.buf.borrow_mut().write(buf)
}

fn flush(&mut self) -> std::io::Result<()> {
self.buf.borrow_mut().flush()
Ok(RawBuildInfo { id, build_info, build_context })
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::{artifacts::Error, compilers::CompilerOutput, SolcInput, Source};
use crate::{
artifacts::Error,
compilers::{
solc::{SolcLanguage, SolcVersionedInput},
CompilerOutput,
},
Source,
};
use std::{collections::BTreeMap, path::PathBuf};

#[test]
fn build_info_serde() {
let inputs = SolcInput::resolve_and_build(
let v: Version = "0.8.4+commit.c7e474f2".parse().unwrap();
let input = SolcVersionedInput::build(
BTreeMap::from([(PathBuf::from("input.sol"), Source::new(""))]),
Default::default(),
SolcLanguage::Solidity,
v,
);
let output = CompilerOutput::<Error>::default();
let v: Version = "0.8.4+commit.c7e474f2".parse().unwrap();
let raw_info = RawBuildInfo::new(&inputs[0], &output, &v).unwrap();
let _info: BuildInfo<SolcInput, CompilerOutput<Error>> =
serde_json::from_str(&raw_info.build_info).unwrap();
let raw_info = RawBuildInfo::new(&input, &output, true).unwrap();
let _info: BuildInfo<SolcVersionedInput, CompilerOutput<Error>> =
serde_json::from_str(&serde_json::to_string(&raw_info).unwrap()).unwrap();
}
}
Loading

0 comments on commit 3ed1e06

Please sign in to comment.