From f802a07ebb8a7b146d2a8e8e2a26cf538398c072 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Sun, 30 Jun 2024 18:27:18 +0200 Subject: [PATCH] perf: parse in parallel --- .github/workflows/ci.yml | 6 ++++ Cargo.lock | 1 + crates/ast/src/ast/semver.rs | 6 ++-- crates/data-structures/src/map.rs | 2 +- crates/data-structures/src/scope.rs | 4 +-- crates/interface/Cargo.toml | 1 + crates/interface/src/globals.rs | 7 ---- crates/interface/src/session.rs | 7 ++++ crates/interface/src/source_map/mod.rs | 5 ++- crates/interface/src/symbol.rs | 14 +++----- crates/macros/src/symbols/mod.rs | 2 ++ crates/parse/src/parser/mod.rs | 2 +- crates/sema/src/lib.rs | 48 +++++++++++++++++++++++++- crates/sulk/src/main.rs | 5 +-- crates/sulk/src/utils.rs | 10 +++--- 15 files changed, 85 insertions(+), 35 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebdf8f6e..baf1cc96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,7 @@ jobs: fail-fast: false matrix: include: + - rust: 1.74 # MSRV - rust: stable - rust: nightly - rust: nightly @@ -35,7 +36,12 @@ jobs: - uses: Swatinem/rust-cache@v2 with: cache-on-failure: true + # Only run tests on latest stable and above + - name: build + if: ${{ matrix.rust == '1.74' }} # MSRV + run: cargo build ${{ matrix.flags }} - name: test + if: ${{ matrix.rust != '1.74' }} # MSRV run: cargo test --workspace ${{ matrix.flags }} clippy: diff --git a/Cargo.lock b/Cargo.lock index de7b7053..e607e404 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2345,6 +2345,7 @@ dependencies = [ "anstyle", "bumpalo", "itertools 0.13.0", + "itoa", "match_cfg", "md-5", "normalize-path", diff --git a/crates/ast/src/ast/semver.rs b/crates/ast/src/ast/semver.rs index 6d5c0215..551a6618 100644 --- a/crates/ast/src/ast/semver.rs +++ b/crates/ast/src/ast/semver.rs @@ -56,8 +56,7 @@ impl PartialEq for SemverVersionNumber { #[inline] fn eq(&self, other: &Self) -> bool { match (self, other) { - (Self::Wildcard, _) => true, - (_, Self::Wildcard) => true, + (Self::Wildcard, _) | (_, Self::Wildcard) => true, (Self::Number(a), Self::Number(b)) => a == b, } } @@ -76,8 +75,7 @@ impl Ord for SemverVersionNumber { #[inline] fn cmp(&self, other: &Self) -> Ordering { match (self, other) { - (Self::Wildcard, _) => Ordering::Equal, - (_, Self::Wildcard) => Ordering::Equal, + (Self::Wildcard, _) | (_, Self::Wildcard) => Ordering::Equal, (Self::Number(a), Self::Number(b)) => a.cmp(b), } } diff --git a/crates/data-structures/src/map.rs b/crates/data-structures/src/map.rs index 5d48da01..6afb75d1 100644 --- a/crates/data-structures/src/map.rs +++ b/crates/data-structures/src/map.rs @@ -12,7 +12,7 @@ use std::{ }; pub use ahash::{self, AHasher}; -pub use rustc_hash::{self, FxHasher}; +pub use rustc_hash::{self, FxBuildHasher, FxHasher}; /// [`HashMap`] entry type. pub type StdEntry<'a, K, V> = std::collections::hash_map::Entry<'a, K, V>; diff --git a/crates/data-structures/src/scope.rs b/crates/data-structures/src/scope.rs index e52c4cbb..021b69f2 100644 --- a/crates/data-structures/src/scope.rs +++ b/crates/data-structures/src/scope.rs @@ -1,6 +1,6 @@ use std::{ - collections::HashMap, - hash::{BuildHasher, Hash, RandomState}, + collections::{hash_map::RandomState, HashMap}, + hash::{BuildHasher, Hash}, }; /// A single scope. diff --git a/crates/interface/Cargo.toml b/crates/interface/Cargo.toml index 3af13845..ab36bf02 100644 --- a/crates/interface/Cargo.toml +++ b/crates/interface/Cargo.toml @@ -26,6 +26,7 @@ sulk-macros.workspace = true bumpalo.workspace = true itertools.workspace = true +itoa = "1.0" match_cfg.workspace = true md-5.workspace = true normalize-path = "0.2.1" diff --git a/crates/interface/src/globals.rs b/crates/interface/src/globals.rs index 2e7f98f3..3e2e977a 100644 --- a/crates/interface/src/globals.rs +++ b/crates/interface/src/globals.rs @@ -43,13 +43,6 @@ impl SessionGlobals { SESSION_GLOBALS.set(self, f) } - /// Clears the newly-interned symbols, keeping the pre-interned ones. - /// - /// NOTE: This invalidates all `Symbol` values interned. Use with care. - pub fn unchecked_clear_interned_symbols(&self) { - self.symbol_interner.freshen(); - } - /// Insert `source_map` into the session globals for the duration of the /// closure's execution. pub fn with_source_map(source_map: Lrc, f: impl FnOnce() -> R) -> R { diff --git a/crates/interface/src/session.rs b/crates/interface/src/session.rs index 421dd744..3b652a83 100644 --- a/crates/interface/src/session.rs +++ b/crates/interface/src/session.rs @@ -1,4 +1,5 @@ use crate::{diagnostics::DiagCtxt, ColorChoice, SourceMap}; +use std::num::NonZeroUsize; use sulk_config::{EvmVersion, Language, StopAfter}; use sulk_data_structures::sync::Lrc; @@ -9,9 +10,14 @@ pub struct Session { /// The source map. source_map: Lrc, + /// EVM version. pub evm_version: EvmVersion, + /// Source code language. pub language: Language, + /// Stop execution after the given compiler stage. pub stop_after: Option, + /// Number of threads to use. Already resolved to a non-zero value. + pub jobs: NonZeroUsize, } impl Session { @@ -23,6 +29,7 @@ impl Session { evm_version: EvmVersion::default(), language: Language::default(), stop_after: None, + jobs: NonZeroUsize::MIN, } } diff --git a/crates/interface/src/source_map/mod.rs b/crates/interface/src/source_map/mod.rs index d8880cdd..423d32c1 100644 --- a/crates/interface/src/source_map/mod.rs +++ b/crates/interface/src/source_map/mod.rs @@ -193,6 +193,8 @@ impl SourceMap { file_id: StableSourceFileId, mut file: SourceFile, ) -> Result, OffsetOverflowError> { + debug!(name=%file.name.display(), len=file.src.len(), "adding to source map"); + let mut files = self.files.write(); file.start_pos = BytePos(if let Some(last_file) = files.source_files.last() { @@ -281,12 +283,9 @@ impl SourceMap { }) } - // #[instrument(skip(self), level = "trace")] pub fn is_valid_span(&self, sp: Span) -> Result<(Loc, Loc), SpanLinesError> { let lo = self.lookup_char_pos(sp.lo()); - // trace!(?lo); let hi = self.lookup_char_pos(sp.hi()); - // trace!(?hi); if lo.file.start_pos != hi.file.start_pos { return Err(SpanLinesError::DistinctSources(Box::new(DistinctSources { begin: (lo.file.name.clone(), lo.file.start_pos), diff --git a/crates/interface/src/symbol.rs b/crates/interface/src/symbol.rs index 46521256..b3231107 100644 --- a/crates/interface/src/symbol.rs +++ b/crates/interface/src/symbol.rs @@ -362,10 +362,6 @@ impl Interner { Self(Lock::new(InternerInner::prefill(init))) } - pub(crate) fn freshen(&self) { - *self.0.lock() = InternerInner::fresh(); - } - #[inline] fn intern(&self, string: &str) -> Symbol { let mut inner = self.0.lock(); @@ -472,13 +468,11 @@ pub mod sym { /// Get the symbol for an integer. /// /// The first few non-negative integers each have a static symbol and therefore are fast. - pub fn integer + Copy + ToString>(n: N) -> Symbol { - if let Ok(idx) = n.try_into() { - if idx < 10 { - return Symbol::new(super::SYMBOL_DIGITS_BASE + idx as u32); - } + pub fn integer + Copy + itoa::Integer>(n: N) -> Symbol { + if let Ok(idx @ 0..=9) = n.try_into() { + return Symbol::new(super::SYMBOL_DIGITS_BASE + idx as u32); } - Symbol::intern(&n.to_string()) + Symbol::intern(itoa::Buffer::new().format(n)) } } diff --git a/crates/macros/src/symbols/mod.rs b/crates/macros/src/symbols/mod.rs index 12646ae6..f1cb6466 100644 --- a/crates/macros/src/symbols/mod.rs +++ b/crates/macros/src/symbols/mod.rs @@ -299,9 +299,11 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { let symbol_digits_base = entries.map["0"].idx; let preinterned_symbols_count = entries.len(); + let preinterned_symbols_bytes = entries.map.keys().map(String::len).sum::() as u64; let output = quote! { const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base; const PREINTERNED_SYMBOLS_COUNT: u32 = #preinterned_symbols_count; + const PREINTERNED_SYMBOLS_BYTES: u64 = #preinterned_symbols_bytes; #[allow(non_upper_case_globals)] #[doc(hidden)] diff --git a/crates/parse/src/parser/mod.rs b/crates/parse/src/parser/mod.rs index d24568dc..bbfbf48c 100644 --- a/crates/parse/src/parser/mod.rs +++ b/crates/parse/src/parser/mod.rs @@ -316,7 +316,7 @@ impl<'a> Parser<'a> { format!("expected {expect}, found {actual}"), (self.prev_token.span.shrink_to_hi(), format!("expected {expect}")), ), - len @ 2.. => { + len => { let fmt = format!("expected one of {expect}, found {actual}"); let short_expect = if len > 6 { format!("{len} possible tokens") } else { expect }; let s = self.prev_token.span.shrink_to_hi(); diff --git a/crates/sema/src/lib.rs b/crates/sema/src/lib.rs index ae2fe967..f626da49 100644 --- a/crates/sema/src/lib.rs +++ b/crates/sema/src/lib.rs @@ -152,19 +152,64 @@ impl<'a> Resolver<'a> { #[instrument(level = "debug", skip_all)] fn parse(&mut self) { let mut sources = std::mem::take(&mut self.sources); + if self.sess.jobs.get() == 1 { + self.parse_sequential(&mut sources); + } else { + self.parse_parallel(&mut sources); + } + self.sources = sources; + } + + fn parse_sequential(&self, sources: &mut Sources) { for i in 0.. { let current_file = SourceId::from_usize(i); let Some(source) = sources.get(current_file) else { break }; debug_assert!(source.ast.is_none(), "file already parsed"); let ast = self.parse_one(&source.file); + let n_sources = sources.0.len(); for import in self.resolve_imports(&source.file, ast.as_ref()) { let import = sources.add_file(import); sources.push_import(current_file, import); } + let new_files = sources.0.len() - n_sources; + if new_files > 0 { + trace!(new_files); + } sources.0[current_file].ast = ast; } - self.sources = sources; + } + + fn parse_parallel(&self, sources: &mut Sources) { + let mut start = 0; + loop { + let base = start; + let Some(to_parse) = sources.0.raw.get_mut(start..) else { break }; + if to_parse.is_empty() { + break; + } + trace!(start, "parsing {} files", to_parse.len()); + start += to_parse.len(); + let imports = to_parse + .par_iter_mut() + .enumerate() + .flat_map_iter(|(i, source)| { + debug_assert!(source.ast.is_none(), "source already parsed"); + source.ast = self.parse_one(&source.file); + self.resolve_imports(&source.file, source.ast.as_ref()) + .map(move |import| (i, import)) + }) + .collect_vec_list(); + let n_sources = sources.0.len(); + for (i, import) in imports.into_iter().flatten() { + let import_id = sources.add_file(import); + sources.push_import(SourceId::from_usize(base + i), import_id); + } + let new_files = sources.0.len() - n_sources; + if new_files > 0 { + trace!(new_files); + } + } } /// Parses a single file. @@ -213,6 +258,7 @@ impl<'a> Resolver<'a> { }) } + /// Performs [AST validation](AstValidator) on all ASTs, in parallel. #[instrument(level = "debug", skip_all)] fn validate_asts(&self) { self.sources.par_asts().for_each(|ast| AstValidator::validate(self.sess, ast)); diff --git a/crates/sulk/src/main.rs b/crates/sulk/src/main.rs index d9b3f5ce..4ebbd1ba 100644 --- a/crates/sulk/src/main.rs +++ b/crates/sulk/src/main.rs @@ -4,7 +4,7 @@ use clap::Parser as _; use cli::Args; -use std::{path::Path, process::ExitCode}; +use std::{num::NonZeroUsize, path::Path, process::ExitCode}; use sulk_data_structures::sync::Lrc; use sulk_interface::{ diagnostics::{DiagCtxt, DynEmitter, HumanEmitter, JsonEmitter}, @@ -118,7 +118,7 @@ impl Compiler { } fn run_compiler_with(args: Args, f: impl FnOnce(&Compiler) -> Result + Send) -> Result { - utils::run_in_thread_pool_with_globals(args.threads, || { + utils::run_in_thread_pool_with_globals(args.threads, |jobs| { let ui_testing = args.unstable.ui_testing; let source_map = Lrc::new(SourceMap::empty()); let emitter: Box = match args.error_format { @@ -151,6 +151,7 @@ fn run_compiler_with(args: Args, f: impl FnOnce(&Compiler) -> Result + Send) -> sess.evm_version = args.evm_version; sess.language = args.language; sess.stop_after = args.stop_after; + sess.jobs = NonZeroUsize::new(jobs).unwrap(); let compiler = Compiler { sess, args }; diff --git a/crates/sulk/src/utils.rs b/crates/sulk/src/utils.rs index 31ead4c9..deeafbe8 100644 --- a/crates/sulk/src/utils.rs +++ b/crates/sulk/src/utils.rs @@ -26,8 +26,8 @@ fn try_init_logger() -> std::result::Result<(), impl std::fmt::Display> { pub(crate) fn install_panic_hook() { // If the user has not explicitly overridden "RUST_BACKTRACE", then produce full backtraces. - // When a compiler ICE happens, we want to gather as much information as possible - // to present in the issue opened by the user. + // When a compiler ICE happens, we want to gather as much information as possible to present in + // the issue opened by the user. if std::env::var_os("RUST_BACKTRACE").is_none() { std::env::set_var("RUST_BACKTRACE", "full"); } @@ -66,10 +66,12 @@ fn panic_hook(info: &PanicInfo<'_>) { pub(crate) fn run_in_thread_pool_with_globals( threads: usize, - f: impl FnOnce() -> R + Send, + f: impl FnOnce(usize) -> R + Send, ) -> R { let mut builder = rayon::ThreadPoolBuilder::new().thread_name(|i| format!("sulk-{i}")).num_threads(threads); + // We still want to use a rayon thread pool with 1 thread so that `ParallelIterator` don't + // install their own thread pool. if threads == 1 { builder = builder.use_current_thread(); } @@ -84,7 +86,7 @@ pub(crate) fn run_in_thread_pool_with_globals( // Initialize each new worker thread when created. move |thread| session_globals.set(|| thread.run()), // Run `f` on the first thread in the thread pool. - move |pool| pool.install(f), + move |pool| pool.install(|| f(pool.current_num_threads())), ) .unwrap() })