From edded24d2256a074e8e390285e123e39f926551d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Rodr=C3=ADguez?= Date: Mon, 14 Aug 2023 17:01:17 +0200 Subject: [PATCH] feat: optionally output a debug artifact on compile (#2260) Co-authored-by: Tom French <15848336+TomAFrench@users.noreply.github.com> --- crates/fm/src/file_map.rs | 4 +- crates/fm/src/lib.rs | 2 +- crates/nargo/src/artifacts/debug.rs | 50 ++++++++++ crates/nargo/src/artifacts/mod.rs | 1 + crates/nargo/src/ops/preprocess.rs | 23 +++-- crates/nargo_cli/src/cli/compile_cmd.rs | 97 +++++++++++++------ crates/nargo_cli/src/cli/fs/program.rs | 19 +++- crates/noirc_driver/src/contract.rs | 3 + crates/noirc_driver/src/lib.rs | 1 + crates/noirc_errors/src/debug_info.rs | 8 +- .../ssa/acir_gen/acir_ir/generated_acir.rs | 4 +- 11 files changed, 162 insertions(+), 50 deletions(-) create mode 100644 crates/nargo/src/artifacts/debug.rs diff --git a/crates/fm/src/file_map.rs b/crates/fm/src/file_map.rs index 8bbfcf99dd7..22ac6d4e179 100644 --- a/crates/fm/src/file_map.rs +++ b/crates/fm/src/file_map.rs @@ -34,7 +34,9 @@ impl From<&PathBuf> for PathString { pub struct FileMap(SimpleFiles); // XXX: Note that we derive Default here due to ModuleOrigin requiring us to set a FileId -#[derive(Default, Debug, Clone, PartialEq, Eq, Copy, Hash, Serialize, Deserialize)] +#[derive( + Default, Debug, Clone, PartialEq, Eq, Copy, Hash, Serialize, Deserialize, PartialOrd, Ord, +)] pub struct FileId(usize); impl FileId { diff --git a/crates/fm/src/lib.rs b/crates/fm/src/lib.rs index 4c2ce39dd40..ef976a80dbd 100644 --- a/crates/fm/src/lib.rs +++ b/crates/fm/src/lib.rs @@ -70,7 +70,7 @@ impl FileManager { assert!(old_value.is_none(), "ice: the same path was inserted into the file manager twice"); } - pub fn fetch_file(&mut self, file_id: FileId) -> File { + pub fn fetch_file(&self, file_id: FileId) -> File { // Unwrap as we ensure that all file_id's map to a corresponding file in the file map self.file_map.get_file(file_id).unwrap() } diff --git a/crates/nargo/src/artifacts/debug.rs b/crates/nargo/src/artifacts/debug.rs new file mode 100644 index 00000000000..31c15311f06 --- /dev/null +++ b/crates/nargo/src/artifacts/debug.rs @@ -0,0 +1,50 @@ +use serde::{Deserialize, Serialize}; +use std::{ + collections::{BTreeMap, BTreeSet}, + path::PathBuf, +}; + +use fm::FileId; +use noirc_errors::debug_info::DebugInfo; +use noirc_frontend::hir::Context; + +/// For a given file, we store the source code and the path to the file +/// so consumers of the debug artifact can reconstruct the original source code structure. +#[derive(Debug, Serialize, Deserialize)] +pub struct DebugFile { + pub source: String, + pub path: PathBuf, +} + +/// A Debug Artifact stores, for a given program, the debug info for every function +/// along with a map of file Id to the source code so locations in debug info can be mapped to source code they point to. +#[derive(Debug, Serialize, Deserialize)] +pub struct DebugArtifact { + pub debug_symbols: Vec, + pub file_map: BTreeMap, +} + +impl DebugArtifact { + pub fn new(debug_symbols: Vec, compilation_context: &Context) -> Self { + let mut file_map = BTreeMap::new(); + + let files_with_debug_symbols: BTreeSet = debug_symbols + .iter() + .flat_map(|function_symbols| function_symbols.locations.values().map(|loc| loc.file)) + .collect(); + + for file_id in files_with_debug_symbols { + let file_source = compilation_context.file_manager.fetch_file(file_id).source(); + + file_map.insert( + file_id, + DebugFile { + source: file_source.to_string(), + path: compilation_context.file_manager.path(file_id).to_path_buf(), + }, + ); + } + + Self { debug_symbols, file_map } + } +} diff --git a/crates/nargo/src/artifacts/mod.rs b/crates/nargo/src/artifacts/mod.rs index 1ba12c49af7..33311e0856e 100644 --- a/crates/nargo/src/artifacts/mod.rs +++ b/crates/nargo/src/artifacts/mod.rs @@ -8,6 +8,7 @@ use base64::Engine; use serde::{Deserializer, Serializer}; pub mod contract; +pub mod debug; pub mod program; // TODO: move these down into ACVM. diff --git a/crates/nargo/src/ops/preprocess.rs b/crates/nargo/src/ops/preprocess.rs index d07da256ede..0ee4e2590f9 100644 --- a/crates/nargo/src/ops/preprocess.rs +++ b/crates/nargo/src/ops/preprocess.rs @@ -42,7 +42,7 @@ pub fn preprocess_contract_function( include_keys: bool, common_reference_string: &[u8], func: ContractFunction, -) -> Result { +) -> Result<(PreprocessedContractFunction, DebugInfo), B::Error> { // TODO: currently `func`'s bytecode is already optimized for the backend. // In future we'll need to apply those optimizations here. let optimized_bytecode = func.bytecode; @@ -54,14 +54,17 @@ pub fn preprocess_contract_function( (None, None) }; - Ok(PreprocessedContractFunction { - name: func.name, - function_type: func.function_type, - is_internal: func.is_internal, - abi: func.abi, + Ok(( + PreprocessedContractFunction { + name: func.name, + function_type: func.function_type, + is_internal: func.is_internal, + abi: func.abi, - bytecode: optimized_bytecode, - proving_key, - verification_key, - }) + bytecode: optimized_bytecode, + proving_key, + verification_key, + }, + func.debug, + )) } diff --git a/crates/nargo_cli/src/cli/compile_cmd.rs b/crates/nargo_cli/src/cli/compile_cmd.rs index fad9783e654..2a6f802f748 100644 --- a/crates/nargo_cli/src/cli/compile_cmd.rs +++ b/crates/nargo_cli/src/cli/compile_cmd.rs @@ -2,14 +2,16 @@ use acvm::acir::circuit::OpcodeLabel; use acvm::{acir::circuit::Circuit, Backend}; use iter_extended::try_vecmap; use iter_extended::vecmap; +use nargo::artifacts::debug::DebugArtifact; use nargo::package::Package; use nargo::prepare_package; use nargo::{artifacts::contract::PreprocessedContract, NargoError}; use nargo_toml::{find_package_manifest, resolve_workspace_from_toml}; use noirc_driver::{ compile_contracts, compile_main, CompileOptions, CompiledContract, CompiledProgram, - ContractFunction, ErrorsAndWarnings, Warnings, + ErrorsAndWarnings, Warnings, }; +use noirc_errors::debug_info::DebugInfo; use noirc_frontend::graph::CrateName; use noirc_frontend::hir::Context; @@ -19,6 +21,7 @@ use nargo::ops::{preprocess_contract_function, preprocess_program}; use crate::errors::{CliError, CompileError}; +use super::fs::program::save_debug_artifact_to_file; use super::fs::{ common_reference_string::{ read_cached_common_reference_string, update_common_reference_string, @@ -38,6 +41,10 @@ pub(crate) struct CompileCommand { #[arg(long)] include_keys: bool, + /// Output debug files + #[arg(long, hide = true)] + output_debug: bool, + /// The name of the package to compile #[clap(long)] package: Option, @@ -70,49 +77,72 @@ pub(crate) fn run( // As can be seen here, It seems like a leaky abstraction where ContractFunctions (essentially CompiledPrograms) // are compiled via nargo-core and then the PreprocessedContract is constructed here. // This is due to EACH function needing it's own CRS, PKey, and VKey from the backend. - let preprocessed_contracts: Result, CliError> = - try_vecmap(optimized_contracts, |contract| { - let preprocessed_contract_functions = try_vecmap(contract.functions, |func| { - common_reference_string = update_common_reference_string( - backend, - &common_reference_string, - &func.bytecode, - ) - .map_err(CliError::CommonReferenceStringError)?; - - preprocess_contract_function( - backend, - args.include_keys, - &common_reference_string, - func, - ) - .map_err(CliError::ProofSystemCompilerError) - })?; - - Ok(PreprocessedContract { + let preprocessed_contracts: Result< + Vec<(PreprocessedContract, Vec)>, + CliError, + > = try_vecmap(optimized_contracts, |contract| { + let preprocess_result = try_vecmap(contract.functions, |func| { + common_reference_string = update_common_reference_string( + backend, + &common_reference_string, + &func.bytecode, + ) + .map_err(CliError::CommonReferenceStringError)?; + + preprocess_contract_function( + backend, + args.include_keys, + &common_reference_string, + func, + ) + .map_err(CliError::ProofSystemCompilerError) + })?; + + let (preprocessed_contract_functions, debug_infos): (Vec<_>, Vec<_>) = + preprocess_result.into_iter().unzip(); + + Ok(( + PreprocessedContract { name: contract.name, backend: String::from(BACKEND_IDENTIFIER), functions: preprocessed_contract_functions, - }) - }); - for contract in preprocessed_contracts? { + }, + debug_infos, + )) + }); + for (contract, debug_infos) in preprocessed_contracts? { save_contract_to_file( &contract, &format!("{}-{}", package.name, contract.name), &circuit_dir, ); + + if args.output_debug { + let debug_artifact = DebugArtifact::new(debug_infos, &context); + save_debug_artifact_to_file( + &debug_artifact, + &format!("{}-{}", package.name, contract.name), + &circuit_dir, + ); + } } } else { - let (_, program) = compile_package(backend, package, &args.compile_options)?; + let (context, program) = compile_package(backend, package, &args.compile_options)?; common_reference_string = update_common_reference_string(backend, &common_reference_string, &program.circuit) .map_err(CliError::CommonReferenceStringError)?; - let (preprocessed_program, _) = + let (preprocessed_program, debug_info) = preprocess_program(backend, args.include_keys, &common_reference_string, program) .map_err(CliError::ProofSystemCompilerError)?; save_program_to_file(&preprocessed_program, &package.name, &circuit_dir); + + if args.output_debug { + let debug_artifact = DebugArtifact::new(vec![debug_info], &context); + let circuit_name: String = (&package.name).into(); + save_debug_artifact_to_file(&debug_artifact, &circuit_name, &circuit_dir); + } } } @@ -167,9 +197,18 @@ pub(super) fn optimize_contract( backend: &B, contract: CompiledContract, ) -> Result> { - let functions = try_vecmap(contract.functions, |func| { - let optimized_bytecode = optimize_circuit(backend, func.bytecode)?.0; - Ok::<_, CliError>(ContractFunction { bytecode: optimized_bytecode, ..func }) + let functions = try_vecmap(contract.functions, |mut func| { + let (optimized_bytecode, opcode_labels) = optimize_circuit(backend, func.bytecode)?; + let opcode_ids = vecmap(opcode_labels, |label| match label { + OpcodeLabel::Unresolved => { + unreachable!("Compiled circuit opcodes must resolve to some index") + } + OpcodeLabel::Resolved(index) => index as usize, + }); + + func.bytecode = optimized_bytecode; + func.debug.update_acir(opcode_ids); + Ok::<_, CliError>(func) })?; Ok(CompiledContract { functions, ..contract }) diff --git a/crates/nargo_cli/src/cli/fs/program.rs b/crates/nargo_cli/src/cli/fs/program.rs index 311923a6686..3f7107de667 100644 --- a/crates/nargo_cli/src/cli/fs/program.rs +++ b/crates/nargo_cli/src/cli/fs/program.rs @@ -1,6 +1,8 @@ use std::path::{Path, PathBuf}; -use nargo::artifacts::{contract::PreprocessedContract, program::PreprocessedProgram}; +use nargo::artifacts::{ + contract::PreprocessedContract, debug::DebugArtifact, program::PreprocessedProgram, +}; use noirc_frontend::graph::CrateName; use crate::errors::FilesystemError; @@ -15,6 +17,7 @@ pub(crate) fn save_program_to_file>( let circuit_name: String = crate_name.into(); save_build_artifact_to_file(compiled_program, &circuit_name, circuit_dir) } + pub(crate) fn save_contract_to_file>( compiled_contract: &PreprocessedContract, circuit_name: &str, @@ -22,13 +25,23 @@ pub(crate) fn save_contract_to_file>( ) -> PathBuf { save_build_artifact_to_file(compiled_contract, circuit_name, circuit_dir) } + +pub(crate) fn save_debug_artifact_to_file>( + debug_artifact: &DebugArtifact, + circuit_name: &str, + circuit_dir: P, +) -> PathBuf { + let artifact_name = format!("debug_{}", circuit_name); + save_build_artifact_to_file(debug_artifact, &artifact_name, circuit_dir) +} + fn save_build_artifact_to_file, T: ?Sized + serde::Serialize>( build_artifact: &T, - circuit_name: &str, + artifact_name: &str, circuit_dir: P, ) -> PathBuf { create_named_dir(circuit_dir.as_ref(), "target"); - let circuit_path = circuit_dir.as_ref().join(circuit_name).with_extension("json"); + let circuit_path = circuit_dir.as_ref().join(artifact_name).with_extension("json"); write_to_file(&serde_json::to_vec(build_artifact).unwrap(), &circuit_path); diff --git a/crates/noirc_driver/src/contract.rs b/crates/noirc_driver/src/contract.rs index a25d258c9be..a1820ff2e47 100644 --- a/crates/noirc_driver/src/contract.rs +++ b/crates/noirc_driver/src/contract.rs @@ -1,6 +1,7 @@ use crate::program::{deserialize_circuit, serialize_circuit}; use acvm::acir::circuit::Circuit; use noirc_abi::Abi; +use noirc_errors::debug_info::DebugInfo; use serde::{Deserialize, Serialize}; /// Describes the types of smart contract functions that are allowed. @@ -47,6 +48,8 @@ pub struct ContractFunction { #[serde(serialize_with = "serialize_circuit", deserialize_with = "deserialize_circuit")] pub bytecode: Circuit, + + pub debug: DebugInfo, } impl ContractFunctionType { diff --git a/crates/noirc_driver/src/lib.rs b/crates/noirc_driver/src/lib.rs index 2919d2fd3ea..0e6a3c519b5 100644 --- a/crates/noirc_driver/src/lib.rs +++ b/crates/noirc_driver/src/lib.rs @@ -249,6 +249,7 @@ fn compile_contract( is_internal: func_meta.is_internal.unwrap_or(false), abi: function.abi, bytecode: function.circuit, + debug: function.debug, }); } diff --git a/crates/noirc_errors/src/debug_info.rs b/crates/noirc_errors/src/debug_info.rs index 3217dba7a38..0c05bee61d5 100644 --- a/crates/noirc_errors/src/debug_info.rs +++ b/crates/noirc_errors/src/debug_info.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::BTreeMap; use crate::Location; use serde::{Deserialize, Serialize}; @@ -6,11 +6,11 @@ use serde::{Deserialize, Serialize}; #[derive(Default, Debug, Clone, Deserialize, Serialize)] pub struct DebugInfo { /// Map opcode index of an ACIR circuit into the source code location - pub locations: HashMap, + pub locations: BTreeMap, } impl DebugInfo { - pub fn new(locations: HashMap) -> Self { + pub fn new(locations: BTreeMap) -> Self { DebugInfo { locations } } @@ -24,7 +24,7 @@ impl DebugInfo { /// This is the case during fallback or width 'optimization' /// opcode_indices is this list of mixed indices pub fn update_acir(&mut self, opcode_indices: Vec) { - let mut new_locations = HashMap::new(); + let mut new_locations = BTreeMap::new(); for (i, idx) in opcode_indices.iter().enumerate() { if self.locations.contains_key(idx) { new_locations.insert(i, self.locations[idx]); diff --git a/crates/noirc_evaluator/src/ssa/acir_gen/acir_ir/generated_acir.rs b/crates/noirc_evaluator/src/ssa/acir_gen/acir_ir/generated_acir.rs index b425eab42d3..fd88921bb3d 100644 --- a/crates/noirc_evaluator/src/ssa/acir_gen/acir_ir/generated_acir.rs +++ b/crates/noirc_evaluator/src/ssa/acir_gen/acir_ir/generated_acir.rs @@ -1,6 +1,6 @@ //! `GeneratedAcir` is constructed as part of the `acir_gen` pass to accumulate all of the ACIR //! program as it is being converted from SSA form. -use std::collections::HashMap; +use std::collections::BTreeMap; use crate::{ brillig::brillig_gen::brillig_directive, @@ -46,7 +46,7 @@ pub(crate) struct GeneratedAcir { pub(crate) input_witnesses: Vec, /// Correspondance between an opcode index (in opcodes) and the source code location which generated it - pub(crate) locations: HashMap, + pub(crate) locations: BTreeMap, /// Source code location of the current instruction being processed /// None if we do not know the location