Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(ast, ast_codegen): optimize AST structs' memory layouts #4404

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2,124 changes: 1,062 additions & 1,062 deletions crates/oxc_ast/src/generated/assert_layouts.rs

Large diffs are not rendered by default.

1,899 changes: 1,899 additions & 0 deletions crates/oxc_ast/src/generated/assert_repr_rust_layout.rs

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion crates/oxc_ast/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![allow(clippy::wildcard_imports)]
#![allow(clippy::inconsistent_struct_constructor)]
// TODO: I'm not sure if it is a but or intentional but clippy needs this allowed both on this
// module and the generated one.
#![allow(clippy::self_named_module_files)]
Expand All @@ -24,7 +25,6 @@
//! [`AssignmentTarget`]: ast::AssignmentTarget
//! [`oxc_parser`]: <https://docs.rs/oxc_parser>
//! [`Parser`]: <https://docs.rs/oxc_parser/latest/oxc_parser/struct.Parser.html>
#[cfg(feature = "serialize")]
mod serialize;

Expand All @@ -39,6 +39,8 @@ mod trivia;
mod generated {
#[cfg(test)]
pub mod assert_layouts;
// TODO: remove me; don't merge to upstream!!
pub mod assert_repr_rust_layout;
pub mod ast_builder;
pub mod ast_kind;
pub mod span;
Expand Down
2 changes: 2 additions & 0 deletions crates/oxc_ast_macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ doctest = false
quote = { workspace = true }
syn = { workspace = true, features = ["full"] }
proc-macro2 = { workspace = true }
lazy_static = { workspace = true }
rustc-hash = { workspace = true }
405 changes: 405 additions & 0 deletions crates/oxc_ast_macros/src/generated/ast_field_order_data.rs

Large diffs are not rendered by default.

22 changes: 17 additions & 5 deletions crates/oxc_ast_macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
mod reorder_fields;
mod generated {
pub mod ast_field_order_data;
}

use proc_macro::TokenStream;
use proc_macro2::TokenStream as TokenStream2;
use quote::quote;
use reorder_fields::reorder_fields;

/// returns `#[repr(C, u8)]` if `enum_` has any non-unit variant,
/// Otherwise it would return `#[repr(u8)]`.
Expand All @@ -26,10 +32,16 @@ fn enum_repr(enum_: &syn::ItemEnum) -> TokenStream2 {
#[allow(clippy::missing_panics_doc)]
pub fn ast(_args: TokenStream, input: TokenStream) -> TokenStream {
let input = syn::parse_macro_input!(input as syn::Item);

let repr = match input {
syn::Item::Enum(ref enum_) => enum_repr(enum_),
syn::Item::Struct(_) => quote!(#[repr(C)]),
let (repr, item) = match input {
syn::Item::Enum(enum_) => (enum_repr(&enum_), syn::Item::Enum(enum_)),
syn::Item::Struct(mut struct_) => {
let id = struct_.ident.to_string();
// if we have field ordering data for this type use it to reorder.
if let Some(data) = generated::ast_field_order_data::get(id.as_str()) {
reorder_fields(&mut struct_, data);
};
(quote!(#[repr(C)]), syn::Item::Struct(struct_))
}

_ => {
unreachable!()
Expand All @@ -39,7 +51,7 @@ pub fn ast(_args: TokenStream, input: TokenStream) -> TokenStream {
let expanded = quote! {
#[derive(::oxc_ast_macros::Ast)]
#repr
#input
#item
};
TokenStream::from(expanded)
}
Expand Down
58 changes: 58 additions & 0 deletions crates/oxc_ast_macros/src/reorder_fields.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use proc_macro2::TokenStream;
use quote::format_ident;
use syn::{Field, Fields, FieldsNamed, FieldsUnnamed, ItemStruct};

const DUMMY: &str = "DUMMY";

#[inline]
pub fn reorder_fields(ty: &mut ItemStruct, data: &[u8]) {
let (Fields::Named(FieldsNamed { named: fields, .. })
| Fields::Unnamed(FieldsUnnamed { unnamed: fields, .. })) = &mut ty.fields
else {
debug_assert!(false, "Entered unreachable code!");
// SAFETY: We don't generate any ordering data for empty structs, And the debug assertions
// are on in CI runs; The debug assertion above would ensure any possible mistake gets caught
// by tests early on in the PR's life span. This allows us to avoid a branch here.
unsafe { std::hint::unreachable_unchecked() }
};

// TODO: We can replace this with uninitialized memory, It might be faster if we use one byte
// to check if a field is placeholder or not and keep the rest of the bytes uninitialized as we
// never read them. I'm not sure if it is safe with a mutable reference or not but I believe it
// would be safe with exclusive ownership of the field.
let mut pick = Field {
attrs: Vec::default(),
vis: syn::Visibility::Inherited,
mutability: syn::FieldMutability::None,
ident: Some(format_ident!("{DUMMY}")),
colon_token: None,
ty: syn::Type::Verbatim(TokenStream::default()),
};
// TODO: use bit array here?
let mut is_ordered = vec![false; fields.len()];
let mut target;
// Best case O(n), Worst case O(2n)
for i in 0..fields.len() {
if is_ordered[i] {
continue;
}

let field = &mut fields[i];
// `pick` the first unordered field
pick = std::mem::replace(field, pick);
// capture its ideal position
target = data[i];

// while we have something in our `pick`
while pick.ident.as_ref().is_some_and(|it| it != DUMMY) {
// select the slot of target position
let field = &mut fields[target as usize];
// put the picked field in the target slot and pick the previous item
pick = std::mem::replace(field, pick);
// mark the field as ordered
is_ordered[target as usize] = true;
// capture the ideal position of our new pick
target = data[target as usize];
}
}
}
90 changes: 90 additions & 0 deletions tasks/ast_codegen/src/generators/ast_field_order.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
use itertools::Itertools;
use proc_macro2::TokenStream;
use quote::quote;
use syn::ItemStruct;

use crate::{layout::Layout, output, schema::RType, CodegenCtx, Generator, GeneratorOutput};

use super::{define_generator, generated_header};

define_generator! {
pub struct AstFieldOrder;
}

impl Generator for AstFieldOrder {
fn name(&self) -> &'static str {
stringify!(AstFieldOrder)
}

fn generate(&mut self, ctx: &CodegenCtx) -> GeneratorOutput {
let orders_64 = ctx
.ty_table
.iter()
.filter(|ty| matches!(&*ty.borrow(), RType::Struct(s) if !s.item.fields.is_empty()))
.map(|ty| {
let RType::Struct(ty) = &*ty.borrow() else { unreachable!() };
generate_orders(&ty.item, &ty.meta.layout_64)
});
let orders_32 = ctx
.ty_table
.iter()
.filter(|ty| matches!(&*ty.borrow(), RType::Struct(s) if !s.item.fields.is_empty()))
.map(|ty| {
let RType::Struct(ty) = &*ty.borrow() else { unreachable!() };
generate_orders(&ty.item, &ty.meta.layout_64)
});
let header = generated_header!();
GeneratorOutput::Stream((
output(crate::AST_MACROS_CRATE, "ast_field_order_data.rs"),
quote! {
#header
use lazy_static::lazy_static;
use rustc_hash::FxHashMap;

endl!();

pub fn get(ident: &str) -> Option<&[u8]> {

#[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))]
std::compile_error!(
"Platforms with pointer width other than 64 or 32 bit are not supported"
);
#[cfg(target_pointer_width = "64")]
lazy_static! {
static ref DATA: FxHashMap<&'static str, &'static [u8]> =
FxHashMap::from_iter([#(#orders_64),*]);
}
#[cfg(target_pointer_width = "32")]
lazy_static! {
static ref DATA: FxHashMap<&'static str, &'static [u8]> =
FxHashMap::from_iter([#(#orders_32),*]);
}


DATA.get(ident).copied()
}
},
))
}
}

fn generate_orders(ty: &ItemStruct, layout: &Layout) -> Option<TokenStream> {
let ident = &ty.ident.to_string();
let Layout::Layout(layout) = layout else { panic!("Layout isn't determined yet!") };
let offsets = layout.offsets();
if let Some(offsets) = offsets {
let orders = offsets
.iter()
.zip(ty.fields.iter().enumerate())
.sorted_by(|a, b| Ord::cmp(a.0, b.0))
.map(|(_, fi)| fi)
.enumerate()
.sorted_by(|a, b| Ord::cmp(&a.1 .0, &b.1 .0))
.map(|it| {
u8::try_from(it.0).expect("We have no AST type with enough fields to exhaust `u8`.")
});
Some(quote!((#ident, &[#(#orders),*][..])))
} else {
None
}
}
2 changes: 2 additions & 0 deletions tasks/ast_codegen/src/generators/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod assert_layouts;
mod ast_builder;
mod ast_field_order;
mod ast_kind;
mod impl_get_span;
mod visit;
Expand Down Expand Up @@ -42,6 +43,7 @@ pub(crate) use insert;

pub use assert_layouts::AssertLayouts;
pub use ast_builder::AstBuilderGenerator;
pub use ast_field_order::AstFieldOrder;
pub use ast_kind::AstKindGenerator;
pub use impl_get_span::ImplGetSpanGenerator;
pub use visit::{VisitGenerator, VisitMutGenerator};
Expand Down
4 changes: 2 additions & 2 deletions tasks/ast_codegen/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
const AST_CRATE: &str = "crates/oxc_ast";
#[allow(dead_code)]
const AST_MACROS_CRATE: &str = "crates/oxc_ast_macros";

mod defs;
Expand All @@ -22,7 +21,7 @@ use syn::parse_file;

use defs::TypeDef;
use generators::{
AssertLayouts, AstBuilderGenerator, AstKindGenerator, Generator, VisitGenerator,
AssertLayouts, AstBuilderGenerator, AstFieldOrder, AstKindGenerator, Generator, VisitGenerator,
VisitMutGenerator,
};
use schema::{Module, REnum, RStruct, RType, Schema};
Expand Down Expand Up @@ -265,6 +264,7 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
.pass(CalcLayout)
.pass(BuildSchema)
.gen(AssertLayouts)
.gen(AstFieldOrder)
.gen(AstKindGenerator)
.gen(AstBuilderGenerator)
.gen(ImplGetSpanGenerator)
Expand Down
11 changes: 10 additions & 1 deletion tasks/ast_codegen/src/passes/calc_layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,16 @@ fn calc_struct_layout(ty: &mut RStruct, ctx: &CodegenCtx) -> Result<PlatformLayo
fn with_padding(
layouts: &[KnownLayout],
) -> std::result::Result<KnownLayout, std::alloc::LayoutError> {
let layouts = layouts.iter().enumerate();
// reorder fields
let layouts = layouts.iter().enumerate().sorted_by(|a, b| {
let (ia, a) = a;
let (ib, b) = b;
if b.size() == a.size() {
Ord::cmp(&ia, &ib)
} else {
Ord::cmp(&b.size(), &a.size())
}
});
let mut offsets = vec![0; layouts.len()];
let mut output = std::alloc::Layout::from_size_align(0, 1)?;
let mut niches = 0;
Expand Down
Loading