Skip to content

Commit

Permalink
perf(semantic): avoid counting nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Sep 13, 2024
1 parent b4ed564 commit 9145fcc
Show file tree
Hide file tree
Showing 22 changed files with 228 additions and 55 deletions.
17 changes: 12 additions & 5 deletions crates/oxc/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,13 @@ pub trait CompilerInterface {
/* Compress */

if let Some(options) = self.compress_options() {
self.compress(&allocator, &mut program, options);
self.compress(&allocator, &mut program, source_text, options);
}

/* Mangler */

let mangler = self.mangle_options().map(|options| self.mangle(&mut program, options));
let mangler =
self.mangle_options().map(|options| self.mangle(&mut program, source_text, options));

/* Codegen */

Expand Down Expand Up @@ -214,13 +215,19 @@ pub trait CompilerInterface {
&self,
allocator: &'a Allocator,
program: &mut Program<'a>,
source_text: &str,
options: CompressOptions,
) {
Compressor::new(allocator, options).build(program);
Compressor::new(allocator, options).build(program, source_text);
}

fn mangle(&self, program: &mut Program<'_>, options: MangleOptions) -> Mangler {
Mangler::new().with_options(options).build(program)
fn mangle(
&self,
program: &mut Program<'_>,
source_text: &str,
options: MangleOptions,
) -> Mangler {
Mangler::new().with_options(options).build(program, source_text)
}

fn codegen<'a>(
Expand Down
4 changes: 2 additions & 2 deletions crates/oxc_mangler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ impl Mangler {
}

#[must_use]
pub fn build<'a>(mut self, program: &'a Program<'a>) -> Mangler {
let semantic = SemanticBuilder::new("").build(program).semantic;
pub fn build<'a>(mut self, program: &'a Program<'a>, source_text: &str) -> Mangler {
let semantic = SemanticBuilder::new(source_text).build(program).semantic;

// Mangle the symbol table by computing slots from the scope tree.
// A slot is the occurrence index of a binding identifier inside a scope.
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_minifier/examples/mangler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ fn mangler(source_text: &str, source_type: SourceType, debug: bool) -> String {
let allocator = Allocator::default();
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
let mangler = Mangler::new().with_options(MangleOptions { debug }).build(program);
let mangler = Mangler::new().with_options(MangleOptions { debug }).build(program, source_text);
CodeGenerator::new().with_mangler(Some(mangler)).build(program).source_text
}
2 changes: 1 addition & 1 deletion crates/oxc_minifier/examples/minifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ fn minify(source_text: &str, source_type: SourceType, mangle: bool) -> String {
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
let options = MinifierOptions { mangle, compress: CompressOptions::all_true() };
let ret = Minifier::new(options).build(&allocator, program);
let ret = Minifier::new(options).build(&allocator, program, source_text);
CodeGenerator::new().with_mangler(ret.mangler).build(program).source_text
}
8 changes: 5 additions & 3 deletions crates/oxc_minifier/src/compressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ impl<'a> Compressor<'a> {
Self { allocator, options }
}

pub fn build(self, program: &mut Program<'a>) {
let (symbols, scopes) =
SemanticBuilder::new("").build(program).semantic.into_symbol_table_and_scope_tree();
pub fn build(self, program: &mut Program<'a>, source_text: &str) {
let (symbols, scopes) = SemanticBuilder::new(source_text)
.build(program)
.semantic
.into_symbol_table_and_scope_tree();
self.build_with_symbols_and_scopes(symbols, scopes, program);
}

Expand Down
11 changes: 8 additions & 3 deletions crates/oxc_minifier/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,14 @@ impl Minifier {
Self { options }
}

pub fn build<'a>(self, allocator: &'a Allocator, program: &mut Program<'a>) -> MinifierReturn {
Compressor::new(allocator, self.options.compress).build(program);
let mangler = self.options.mangle.then(|| Mangler::default().build(program));
pub fn build<'a>(
self,
allocator: &'a Allocator,
program: &mut Program<'a>,
source_text: &str,
) -> MinifierReturn {
Compressor::new(allocator, self.options.compress).build(program, source_text);
let mangler = self.options.mangle.then(|| Mangler::default().build(program, source_text));
MinifierReturn { mangler }
}
}
2 changes: 1 addition & 1 deletion crates/oxc_minifier/tests/mangler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fn mangle(source_text: &str) -> String {
let source_type = SourceType::mjs();
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = ret.program;
let mangler = Mangler::new().build(&program);
let mangler = Mangler::new().build(&program, source_text);
CodeGenerator::new().with_mangler(Some(mangler)).build(&program).source_text
}

Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_minifier/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn run(source_text: &str, source_type: SourceType, options: Option<CompressOptio
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
if let Some(options) = options {
Compressor::new(&allocator, options).build(program);
Compressor::new(&allocator, options).build(program, source_text);
}
CodeGenerator::new()
.with_options(CodegenOptions { single_quote: true, ..CodegenOptions::default() })
Expand Down
19 changes: 9 additions & 10 deletions crates/oxc_semantic/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::{
binder::Binder,
checker,
class::ClassTableBuilder,
counter::Counts,
counts::Counts,
diagnostics::redeclaration,
jsdoc::JSDocBuilder,
label::UnusedLabels,
Expand Down Expand Up @@ -221,18 +221,16 @@ impl<'a> SemanticBuilder<'a> {
let scope_id = self.scope.add_scope(None, NodeId::DUMMY, ScopeFlags::Top);
program.scope_id.set(Some(scope_id));
} else {
// Count the number of nodes, scopes, symbols, and references.
// Estimate the number of nodes, scopes, symbols, and references.
// Use these counts to reserve sufficient capacity in `AstNodes`, `ScopeTree`
// and `SymbolTable` to store them.
// This means that as we traverse the AST and fill up these structures with data,
// they never need to grow and reallocate - which is an expensive operation as it
// involves copying all the memory from the old allocation to the new one.
// For large source files, these structures are very large, so growth is very costly
// as it involves copying massive chunks of memory.
// Avoiding this growth produces up to 30% perf boost on our benchmarks.
// TODO: It would be even more efficient to calculate counts in parser to avoid
// this extra AST traversal.
let counts = Counts::count(program);
// Avoiding this growth produces up to 40% perf boost on our benchmarks.
let counts = Counts::count(program, self.source_text);
self.nodes.reserve(counts.nodes);
self.scope.reserve(counts.scopes);
self.symbols.reserve(counts.symbols, counts.references);
Expand All @@ -243,11 +241,12 @@ impl<'a> SemanticBuilder<'a> {
// Check that estimated counts accurately
#[cfg(debug_assertions)]
{
#[allow(clippy::cast_possible_truncation)]
let actual_counts = Counts {
nodes: self.nodes.len(),
scopes: self.scope.len(),
symbols: self.symbols.len(),
references: self.symbols.references.len(),
nodes: self.nodes.len() as u32,
scopes: self.scope.len() as u32,
symbols: self.symbols.len() as u32,
references: self.symbols.references.len() as u32,
};
Counts::assert_accurate(&actual_counts, &counts);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
//! Visitor to count nodes, scopes, symbols and references in AST.
//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`,
//! `ScopeTree`, and `SymbolTable` to store info for all these items.
//! Counter to estimate counts of nodes, scopes, symbols and references.
//!
//! Produces an accurate count, by visiting AST and counting these items.
//!
//! Doing a full traverse of AST has a sizeable performance cost, but is necessary on platforms
//! which are 32-bit or do not have virtual memory (e.g. WASM) and so the "standard" version of
//! `Counts` is not suitable.

use std::cell::Cell;

Expand All @@ -13,38 +17,37 @@ use oxc_ast::{
};
use oxc_syntax::scope::{ScopeFlags, ScopeId};

use super::assert_le;

#[derive(Default, Debug)]
pub(crate) struct Counts {
pub nodes: usize,
pub scopes: usize,
pub symbols: usize,
pub references: usize,
pub struct Counts {
pub nodes: u32,
pub scopes: u32,
pub symbols: u32,
pub references: u32,
}

impl Counts {
pub fn count(program: &Program) -> Self {
let mut counts = Counts::default();
/// Calculate counts by visiting AST
pub fn count(program: &Program, _source_text: &str) -> Self {
let mut counts = Self::default();
counts.visit_program(program);
counts
}

/// Assert that estimated counts were accurate
#[cfg_attr(not(debug_assertions), expect(dead_code))]
pub fn assert_accurate(actual: &Self, estimated: &Self) {
assert_eq!(actual.nodes, estimated.nodes, "nodes count mismatch");
assert_eq!(actual.scopes, estimated.scopes, "scopes count mismatch");
assert_eq!(actual.references, estimated.references, "references count mismatch");
// `Counts` may overestimate number of symbols, because multiple `BindingIdentifier`s
// can result in only a single symbol.
// e.g. `var x; var x;` = 2 x `BindingIdentifier` but 1 x symbol.
// This is not a big problem - allocating a `Vec` with excess capacity is cheap.
// This is not a big problem - allocating a `Vec` with excess capacity is fairly cheap.
// It's allocating with *not enough* capacity which is costly, as then the `Vec`
// will grow and reallocate.
assert!(
actual.symbols <= estimated.symbols,
"symbols count mismatch {} <= {}",
actual.symbols,
estimated.symbols
);
assert_le!(actual.symbols, estimated.symbols, "symbols count mismatch");
assert_eq!(actual.references, estimated.references, "references count mismatch");
}
}

Expand Down
54 changes: 54 additions & 0 deletions crates/oxc_semantic/src/counts/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//! Counter to estimate counts of nodes, scopes, symbols and references.
//!
//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`,
//! `ScopeTree`, and `SymbolTable`, to store data for all these items.
//! If sufficient capacity is not reserved in advance, these structures can grow and reallocate
//! during their construction, which involves copying large chunks of memory.
//! This produces a large performance cost - around 30% on our benchmarks for large source files.
//!
//! `Counts` has 2 implementations.
//! * `standard` - preferred version, for 64-bit platforms with virtual memory.
//! * `visitor` - fallback version, for 32-bit platforms, and platforms with no virtual memory (e.g. WASM).
//!
//! Please see docs in each module for the differences between them.

#[cfg(all(target_pointer_width = "64", not(target_arch = "wasm32")))]
mod standard;
#[cfg(all(target_pointer_width = "64", not(target_arch = "wasm32")))]
pub use standard::Counts;

#[cfg(not(all(target_pointer_width = "64", not(target_arch = "wasm32"))))]
mod fallback;
#[cfg(not(all(target_pointer_width = "64", not(target_arch = "wasm32"))))]
pub use fallback::Counts;

/// Macro to assert that `left <= right`
macro_rules! assert_le {
($left:expr, $right:expr, $($msg_args:tt)+) => {
match (&$left, &$right) {
(left, right) => if !(left <= right) {
panic!(
"assertion failed: `(left <= right)`\n left: `{:?}`,\n right: `{:?}`: {}",
left, right,
::std::format_args!($($msg_args)+),
);
}
}
};

($left:expr, $right:expr) => {
match (&$left, &$right) {
(left, right) => if !(left <= right) {
panic!(
"assertion failed: `(left <= right)`\n left: `{:?}`,\n right: `{:?}`",
left, right,
);
}
}
};

($lhs:expr, $rhs:expr,) => {
assert_le!($lhs, $rhs);
};
}
use assert_le;
83 changes: 83 additions & 0 deletions crates/oxc_semantic/src/counts/standard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
//! Counter to estimate counts of nodes, scopes, symbols and references.
//!
//! Estimate counts based on size of source text.
//!
//! These will almost always be a large over-estimate, but will never be an under-estimate.
//! Not under-estimating is the most important thing, as the `Vec`s in `AstNodes`, `ScopeTree`
//! and `SymbolTable` will not need to resize during building `Semantic`, which avoids expensive
//! memory copying.
//!
//! This implementation of `Counts` can be used on 64-bit platforms with virtual memory, where it's
//! not a big problem to reserve excessively large blocks of virtual memory, because:
//! 1. The memory space is so large that it's almost impossible to exhaust.
//! 2. Allocating memory only consumes virtual memory, not physical memory.
//!
//! Note: Ideally, we'd shrink the allocations to fit once the actual size required is known.
//! But found that shrinking caused memory to be reallocated, which had a large perf cost
//! (~10% on semantic benchmarks).

use std::cmp::max;

use oxc_ast::ast::Program;

use super::assert_le;

#[derive(Default, Debug)]
pub struct Counts {
pub nodes: u32,
pub scopes: u32,
pub symbols: u32,
pub references: u32,
}

impl Counts {
/// Calculate counts as probable over-estimates based on size of source text
pub fn count(_program: &Program, source_text: &str) -> Self {
#[allow(clippy::cast_possible_truncation)]
let source_len = source_text.len() as u32;

// Calculate maximum number of nodes, scopes, symbols and references that's possible
// for given length of source code.
// These will almost always be a large over-estimate, but will never be an under-estimate.

// The most node-intensive code is:
// `` = 0 bytes, 1 nodes
// `x` = 1 bytes, 3 nodes
// `x=x` = 3 bytes, 7 nodes
// `x=x=x` = 5 bytes, 11 nodes
#[allow(clippy::cast_lossless)]
let nodes = u32::try_from(source_len as u64 * 2 + 1).unwrap_or(u32::MAX);

// The most scope-intensive code is:
// `` = 0 bytes, 1 scopes
// `{}` = 2 bytes, 2 scopes
// `{{}}` = 4 bytes, 3 scopes
// `{{{}}}` = 6 bytes, 4 scopes
let scopes = source_len / 2 + 1;

// The most symbol-intensive code is:
// `` = 0 bytes, 0 symbols
// `a=>0` = 4 bytes, 1 symbols
// `(a,a)=>0` = 8 bytes, 2 symbols
// `var a` = 5 bytes, 1 symbols
// `var a,a` = 7 bytes, 2 symbols
let symbols = max(source_len / 2, 1) - 1;

// The most reference-intensive code is:
// `a` = 1 bytes, 1 references
// `a,a` = 3 bytes, 2 references
// `a,a,a` = 5 bytes, 3 references
let references = source_len / 2 + 1;

Self { nodes, scopes, symbols, references }
}

/// Assert that estimated counts were not an under-estimate
#[cfg_attr(not(debug_assertions), expect(dead_code))]
pub fn assert_accurate(actual: &Self, estimated: &Self) {
assert_le!(actual.nodes, estimated.nodes, "nodes count mismatch");
assert_le!(actual.scopes, estimated.scopes, "scopes count mismatch");
assert_le!(actual.symbols, estimated.symbols, "symbols count mismatch");
assert_le!(actual.references, estimated.references, "references count mismatch");
}
}
2 changes: 1 addition & 1 deletion crates/oxc_semantic/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ mod binder;
mod builder;
mod checker;
mod class;
mod counter;
mod counts;
mod diagnostics;
mod jsdoc;
mod label;
Expand Down
3 changes: 2 additions & 1 deletion crates/oxc_semantic/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ impl<'a> AstNodes<'a> {
ast_node_id
}

pub fn reserve(&mut self, additional: usize) {
pub fn reserve(&mut self, additional: u32) {
let additional = additional as usize;
self.nodes.reserve(additional);
self.parent_ids.reserve(additional);
}
Expand Down
Loading

0 comments on commit 9145fcc

Please sign in to comment.