diff --git a/crates/oxc_ast/src/ast/js.rs b/crates/oxc_ast/src/ast/js.rs index e60ee90000d5da..3172ef702d85c1 100644 --- a/crates/oxc_ast/src/ast/js.rs +++ b/crates/oxc_ast/src/ast/js.rs @@ -1,3 +1,6 @@ +// NB: `#[visited_node]` attribute on AST nodes does not do anything to the code in this file. +// It is purely a marker for codegen used in `oxc_traverse`. See docs in that crate. + // Silence erroneous warnings from Rust Analyser for `#[derive(Tsify)]` #![allow(non_snake_case)] diff --git a/crates/oxc_ast/src/ast/jsx.rs b/crates/oxc_ast/src/ast/jsx.rs index 300379ba2d55e5..8722901b7c680c 100644 --- a/crates/oxc_ast/src/ast/jsx.rs +++ b/crates/oxc_ast/src/ast/jsx.rs @@ -1,5 +1,8 @@ //! [JSX](https://facebook.github.io/jsx) +// NB: `#[visited_node]` attribute on AST nodes does not do anything to the code in this file. +// It is purely a marker for codegen used in `oxc_traverse`. See docs in that crate. + // Silence erroneous warnings from Rust Analyser for `#[derive(Tsify)]` #![allow(non_snake_case)] diff --git a/crates/oxc_ast/src/ast/literal.rs b/crates/oxc_ast/src/ast/literal.rs index b7eca21cbe90d5..9d756ebdfcc5fb 100644 --- a/crates/oxc_ast/src/ast/literal.rs +++ b/crates/oxc_ast/src/ast/literal.rs @@ -1,5 +1,8 @@ //! Literals +// NB: `#[visited_node]` attribute on AST nodes does not do anything to the code in this file. +// It is purely a marker for codegen used in `oxc_traverse`. See docs in that crate. + // Silence erroneous warnings from Rust Analyser for `#[derive(Tsify)]` #![allow(non_snake_case)] diff --git a/crates/oxc_ast/src/ast/ts.rs b/crates/oxc_ast/src/ast/ts.rs index f7499b9d2e103e..3096882fda6588 100644 --- a/crates/oxc_ast/src/ast/ts.rs +++ b/crates/oxc_ast/src/ast/ts.rs @@ -3,6 +3,9 @@ //! [AST Spec](https://github.com/typescript-eslint/typescript-eslint/tree/main/packages/ast-spec) //! [Archived TypeScript spec](https://github.com/microsoft/TypeScript/blob/3c99d50da5a579d9fa92d02664b1b66d4ff55944/doc/spec-ARCHIVED.md) +// NB: `#[visited_node]` attribute on AST nodes does not do anything to the code in this file. +// It is purely a marker for codegen used in `oxc_traverse`. See docs in that crate. + // Silence erroneous warnings from Rust Analyser for `#[derive(Tsify)]` #![allow(non_snake_case)] diff --git a/crates/oxc_traverse/Cargo.toml b/crates/oxc_traverse/Cargo.toml index c74ca1f02f591e..fbaade960fffde 100644 --- a/crates/oxc_traverse/Cargo.toml +++ b/crates/oxc_traverse/Cargo.toml @@ -15,7 +15,7 @@ categories.workspace = true workspace = true [lib] -doctest = false +doctest = true [dependencies] oxc_allocator = { workspace = true } diff --git a/crates/oxc_traverse/scripts/lib/ancestor.mjs b/crates/oxc_traverse/scripts/lib/ancestor.mjs index c2a1bcf33be2fd..98d7bcb405c1c7 100644 --- a/crates/oxc_traverse/scripts/lib/ancestor.mjs +++ b/crates/oxc_traverse/scripts/lib/ancestor.mjs @@ -123,6 +123,9 @@ export default function generateAncestorsCode(types) { /// /// Encodes both the type of the parent, and child's location in the parent. /// i.e. variants for \`BinaryExpressionLeft\` and \`BinaryExpressionRight\`, not just \`BinaryExpression\`. + // + // SAFETY: This type MUST be \`#[repr(u8)]\` or \`#[repr(u16)]\` (depending on number of variants) + // to maintain the safety of \`TraverseCtx::retag_stack\`. #[repr(C, ${discriminantType})] #[derive(Debug)] pub enum Ancestor<'a> { diff --git a/crates/oxc_traverse/scripts/package.json b/crates/oxc_traverse/scripts/package.json index 54340c04cab54f..50394bcefad29b 100644 --- a/crates/oxc_traverse/scripts/package.json +++ b/crates/oxc_traverse/scripts/package.json @@ -1,4 +1,5 @@ { "name": "oxc_ast_scripts", - "version": "0.0.0" + "version": "0.0.0", + "type": "module" } diff --git a/crates/oxc_traverse/src/ancestor.rs b/crates/oxc_traverse/src/ancestor.rs index c74dd09fa3a112..a34ec5d826e3c4 100644 --- a/crates/oxc_traverse/src/ancestor.rs +++ b/crates/oxc_traverse/src/ancestor.rs @@ -24,6 +24,9 @@ pub(crate) type AncestorDiscriminant = u16; /// /// Encodes both the type of the parent, and child's location in the parent. /// i.e. variants for `BinaryExpressionLeft` and `BinaryExpressionRight`, not just `BinaryExpression`. +// +// SAFETY: This type MUST be `#[repr(u8)]` or `#[repr(u16)]` (depending on number of variants) +// to maintain the safety of `TraverseCtx::retag_stack`. #[repr(C, u16)] #[derive(Debug)] pub enum Ancestor<'a> { diff --git a/crates/oxc_traverse/src/context.rs b/crates/oxc_traverse/src/context.rs index 259d8666f72ff9..5e1c0f342df29e 100644 --- a/crates/oxc_traverse/src/context.rs +++ b/crates/oxc_traverse/src/context.rs @@ -1,4 +1,4 @@ -use oxc_allocator::Allocator; +use oxc_allocator::{Allocator, Box}; use oxc_ast::AstBuilder; use crate::ancestor::{Ancestor, AncestorDiscriminant}; @@ -10,14 +10,28 @@ const INITIAL_STACK_CAPACITY: usize = 64; /// Passed to all AST visitor functions. /// /// Provides ability to: -/// * Query parent/ancestor of current node. -/// * Create AST nodes via `ctx.ast`. -/// * Allocate into arena via `ctx.alloc()`. +/// * Query parent/ancestor of current node via [`parent`], [`ancestor`], [`find_ancestor`]. +/// * Create AST nodes via AST builder [`ast`]. +/// * Allocate into arena via [`alloc`]. +/// +/// [`parent`]: `TraverseCtx::parent` +/// [`ancestor`]: `TraverseCtx::ancestor` +/// [`find_ancestor`]: `TraverseCtx::find_ancestor` +/// [`ast`]: `TraverseCtx::ast` +/// [`alloc`]: `TraverseCtx::alloc` pub struct TraverseCtx<'a> { stack: Vec>, pub ast: AstBuilder<'a>, } +/// Return value when using [`TraverseCtx::find_ancestor`]. +pub enum FinderRet { + Found(T), + Stop, + Continue, +} + +// Public methods impl<'a> TraverseCtx<'a> { /// Create new traversal context. pub fn new(allocator: &'a Allocator) -> Self { @@ -27,9 +41,9 @@ impl<'a> TraverseCtx<'a> { } /// Allocate a node in the arena. - /// Returns a `Box`. + /// Returns a [`Box`]. #[inline] - pub fn alloc(&self, node: T) -> oxc_allocator::Box<'a, T> { + pub fn alloc(&self, node: T) -> Box<'a, T> { self.ast.alloc(node) } @@ -54,7 +68,7 @@ impl<'a> TraverseCtx<'a> { /// /// `finder` should return: /// * `FinderRet::Found(value)` to stop walking and return `Some(value)`. - /// * `FinderRet::Stop` to stop walking and return `None` + /// * `FinderRet::Stop` to stop walking and return `None`. /// * `FinderRet::Continue` to continue walking up. pub fn find_ancestor(&self, finder: F) -> Option where @@ -72,11 +86,17 @@ impl<'a> TraverseCtx<'a> { /// Get depth of ancestry stack. /// i.e. How many nodes above this one in the tree. + /// + /// NB: A "no parent" ancestor above `Program` counts towards this total. + /// The current node does not. #[inline] pub fn ancestors_depth(&self) -> usize { self.stack.len() } +} +// Methods used internally within crate +impl<'a> TraverseCtx<'a> { /// Push item onto stack. #[inline] pub(crate) fn push_stack(&mut self, ancestor: Ancestor<'a>) { @@ -94,10 +114,26 @@ impl<'a> TraverseCtx<'a> { } /// Retag last item on stack. + /// + /// i.e. Alter discriminant of `Ancestor` enum, without changing the "payload" it contains + /// of pointer to the ancestor node. + /// + /// This is purely a performance optimization. If the last item on stack already contains the + /// correct pointer, then `ctx.retag_stack(3)` is equivalent to: + /// + /// ```nocompile + /// ctx.pop_stack(); + /// ctx.push_stack(Ancestor::ProgramBody(ProgramWithoutBody(node_ptr))); + /// ``` + /// + /// (3 is the discriminant for `Ancestor::ProgramBody`) + /// + /// `retag_stack` is only a single 2-byte write operation. + /// /// # SAFETY /// * Stack must not be empty. /// * `discriminant` must be valid discriminant value for `Ancestor` enum. - /// * Last item on stack must contain type corresponding to provided discriminant. + /// * Last item on stack must contain pointer to type corresponding to provided discriminant. #[inline] #[allow(unsafe_code, clippy::ptr_as_ptr, clippy::ref_as_ptr)] pub(crate) unsafe fn retag_stack(&mut self, discriminant: AncestorDiscriminant) { @@ -105,9 +141,3 @@ impl<'a> TraverseCtx<'a> { discriminant; } } - -pub enum FinderRet { - Found(T), - Stop, - Continue, -} diff --git a/crates/oxc_traverse/src/lib.rs b/crates/oxc_traverse/src/lib.rs index 53d6096e5bd4a9..ae32c71f65cf5f 100644 --- a/crates/oxc_traverse/src/lib.rs +++ b/crates/oxc_traverse/src/lib.rs @@ -1,3 +1,65 @@ +//! AST traversal with ability to read up the tree from visitors. +//! +//! Please see [`traverse_mut`] for an explanation of API. +//! +//! # Implementation details +//! +//! Most of the code in this crate is generated by a codegen. +//! Codegen is currently written in JavaScript (`scripts/build.mjs`). +//! +//! Do not edit those files, as they'll be over-written by the build script on next run. +//! +//! The scheme which allows reading up the tree is based on making it statically impossible +//! to violate Rust's aliasing rules. +//! +//! Rust's aliasing rules are (roughly): +//! 1. For any object, you can have as many immutable `&` references simultaneously as you like. +//! 2. For any object, you cannot obtain a mutable `&mut` reference if any other references +//! (immutable or mutable) to that same object exist. +//! 3. A `&`/`&mut` ref covers the object itself and the entire tree below it. +//! i.e. you can't hold a `&mut` to child and any reference to parent at same time (except by +//! "re-borrowing"). +//! +//! This poses a problem for reading back up the tree in a mutating visitor. +//! In a visitor you hold a `&mut` reference to a node, so therefore cannot obtain a `&` ref to +//! its parent, because the parent owns the node. If you had a `&` ref to the parent, that also acts +//! as a `&` ref to the current node = holding `&` and `&mut` refs to same node simultaneously. +//! Disaster! +//! +//! The solution this crate uses is: +//! 1. Don't create references while traversing down the AST in `walk_*` functions. +//! Use raw pointers instead. `&mut` references are only created in the `enter_*` / `exit_*` methods. +//! 2. Don't allow `enter_*` / `exit_*` to access its entire parent or ancestor, only *other branches* +//! of the tree which lead from the parent/ancestor. The parts of the tree above current node +//! which can be accessed via `ctx.parent()` etc **do not overlap** the part of the tree which +//! is available via `&mut` ref inside `enter_*` / `exit_*`. This makes it impossible to obtain +//! 2 references to same AST node at same time. +//! 3. Again, don't create transient references while getting the fields of ancestors. Use raw pointers +//! to go to the field directly, before creating a `&` ref. +//! +//! The mechanism for this is the `Ancestor` enum. Each AST node type has several `Ancestor` variants +//! e.g. `ProgramWithoutDirectives`, `ProgramWithoutHashbang`, `ProgramWithoutBody`. +//! As the names suggest, each of these types grants access to all the fields of `Program` except one. +//! +//! As `walk_*` functions walk down the tree, they add to the stack of `Ancestors` the appropriate type +//! to prevent access to the path which is being walked down. +//! +//! `walk_*` uses `TraverseCtx::retag_stack` to make it as cheap as possible to update the ancestry +//! stack, but this is purely a performance optimization, not essential to the safety of the scheme. +//! +//! # SAFETY +//! This crate contains a great deal of unsafe code. The entirety of `walk.rs` is unsafe functions +//! using raw pointers. +//! +//! To avoid a drain on compile time asking Rust to parse 1000s of `# SAFETY` comments, the codegen-ed +//! files do not contain comments explaining the safety of every unsafe operation. +//! But everything works according to the principles outlined above. +//! +//! Almost all the code is currently codegen-ed. I (@overlookmotel) would recommend continuing to +//! exclusively use a codegen, and not manually editing these files for "special cases". The safety +//! scheme could very easily be derailed entirely by a single mistake, so in my opinion, it's unwise +//! to edit by hand. + use oxc_allocator::Allocator; use oxc_ast::ast::Program; @@ -11,6 +73,69 @@ mod traverse; pub use traverse::Traverse; mod walk; +/// Traverse AST with a [`Traverse`] impl. +/// +/// This allows: +/// 1. Reading and mutating down the tree (i.e. children, children's children, ...) +/// 2. Reading up the tree (i.e. parent, grandparent, ...) +/// +/// `traverser`'s `enter_*` and `exit_*` methods will be called with a `&mut` ref to current AST node +/// and a [`TraverseCtx`] object. +/// +/// [`TraverseCtx`] can be used to access parent or ancestors further up the tree. +/// [`TraverseCtx::parent`] and [`TraverseCtx::ancestor`] return an [`Ancestor`] type. +/// +/// [`Ancestor`] is an enum, whose discriminant encodes both the *type* of the parent, +/// and *location* of the child within the parent. +/// e.g. `Ancestor` has variants for `BinaryExpressionLeft` and `BinaryExpressionRight`, +/// not just `BinaryExpression`. +/// +/// To avoid violating Rust's aliasing rules, you cannot access the property of the parent +/// which current node is the child of. +/// +/// Or, to state the rule more generally: You can read from any branch of the AST *except* +/// the one you are on. +/// +/// A silly analogy: You are a tree surgeon working on pruning an old oak tree. You are sitting +/// on a branch high up in the tree. From that position, you can cut off other branches of the tree +/// no problem, but it would be unwise to saw off the branch that you are sitting on. +/// +/// In practice: For this JS code: +/// +/// ```js +/// x == 1 +/// ``` +/// +/// ``` +/// use oxc_ast::ast::*; +/// use oxc_traverse::{Ancestor, Traverse, TraverseCtx}; +/// +/// struct MyTransform; +/// +/// impl<'a> Traverse<'a> for MyTransform { +/// fn enter_numeric_literal(&mut self, node: &mut NumericLiteral<'a>, ctx: &TraverseCtx<'a>) { +/// // Read parent +/// if let Ancestor::BinaryExpressionRight(bin_expr_ref) = ctx.parent() { +/// // This is legal +/// if let Expression::Identifier(id) = bin_expr_ref.left() { +/// println!("left side is ID: {}", &id.name); +/// } +/// +/// // This would be a compile failure, because the right side is where we came from +/// // dbg!(bin_expr_ref.right()); +/// } +/// +/// // Read grandparent +/// if let Some(Ancestor::ExpressionStatementExpression(stmt_ref)) = ctx.ancestor(2) { +/// // This is legal +/// println!("expression stmt's span: {:?}", stmt_ref.span()); +/// +/// // This would be a compile failure, because the expression is where we came from +/// // dbg!(stmt_ref.expression()); +/// } +/// } +/// } +/// ``` #[allow(unsafe_code)] pub fn traverse_mut<'a, Tr: Traverse<'a>>( traverser: &mut Tr,