Skip to content

Commit

Permalink
Comments and tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed May 5, 2024
1 parent 46cc3a3 commit 0e24b24
Show file tree
Hide file tree
Showing 10 changed files with 190 additions and 16 deletions.
3 changes: 3 additions & 0 deletions crates/oxc_ast/src/ast/js.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// NB: `#[visited_node]` attribute on AST nodes does not do anything to the code in this file.
// It is purely a marker for codegen used in `oxc_traverse`. See docs in that crate.

// Silence erroneous warnings from Rust Analyser for `#[derive(Tsify)]`
#![allow(non_snake_case)]

Expand Down
3 changes: 3 additions & 0 deletions crates/oxc_ast/src/ast/jsx.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
//! [JSX](https://facebook.github.io/jsx)
// NB: `#[visited_node]` attribute on AST nodes does not do anything to the code in this file.
// It is purely a marker for codegen used in `oxc_traverse`. See docs in that crate.

// Silence erroneous warnings from Rust Analyser for `#[derive(Tsify)]`
#![allow(non_snake_case)]

Expand Down
3 changes: 3 additions & 0 deletions crates/oxc_ast/src/ast/literal.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
//! Literals
// NB: `#[visited_node]` attribute on AST nodes does not do anything to the code in this file.
// It is purely a marker for codegen used in `oxc_traverse`. See docs in that crate.

// Silence erroneous warnings from Rust Analyser for `#[derive(Tsify)]`
#![allow(non_snake_case)]

Expand Down
3 changes: 3 additions & 0 deletions crates/oxc_ast/src/ast/ts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
//! [AST Spec](https://github.com/typescript-eslint/typescript-eslint/tree/main/packages/ast-spec)
//! [Archived TypeScript spec](https://github.com/microsoft/TypeScript/blob/3c99d50da5a579d9fa92d02664b1b66d4ff55944/doc/spec-ARCHIVED.md)
// NB: `#[visited_node]` attribute on AST nodes does not do anything to the code in this file.
// It is purely a marker for codegen used in `oxc_traverse`. See docs in that crate.

// Silence erroneous warnings from Rust Analyser for `#[derive(Tsify)]`
#![allow(non_snake_case)]

Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_traverse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ categories.workspace = true
workspace = true

[lib]
doctest = false
doctest = true

[dependencies]
oxc_allocator = { workspace = true }
Expand Down
3 changes: 3 additions & 0 deletions crates/oxc_traverse/scripts/lib/ancestor.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ export default function generateAncestorsCode(types) {
///
/// Encodes both the type of the parent, and child's location in the parent.
/// i.e. variants for \`BinaryExpressionLeft\` and \`BinaryExpressionRight\`, not just \`BinaryExpression\`.
//
// SAFETY: This type MUST be \`#[repr(u8)]\` or \`#[repr(u16)]\` (depending on number of variants)
// to maintain the safety of \`TraverseCtx::retag_stack\`.
#[repr(C, ${discriminantType})]
#[derive(Debug)]
pub enum Ancestor<'a> {
Expand Down
3 changes: 2 additions & 1 deletion crates/oxc_traverse/scripts/package.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "oxc_ast_scripts",
"version": "0.0.0"
"version": "0.0.0",
"type": "module"
}
3 changes: 3 additions & 0 deletions crates/oxc_traverse/src/ancestor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ pub(crate) type AncestorDiscriminant = u16;
///
/// Encodes both the type of the parent, and child's location in the parent.
/// i.e. variants for `BinaryExpressionLeft` and `BinaryExpressionRight`, not just `BinaryExpression`.
//
// SAFETY: This type MUST be `#[repr(u8)]` or `#[repr(u16)]` (depending on number of variants)
// to maintain the safety of `TraverseCtx::retag_stack`.
#[repr(C, u16)]
#[derive(Debug)]
pub enum Ancestor<'a> {
Expand Down
58 changes: 44 additions & 14 deletions crates/oxc_traverse/src/context.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use oxc_allocator::Allocator;
use oxc_allocator::{Allocator, Box};
use oxc_ast::AstBuilder;

use crate::ancestor::{Ancestor, AncestorDiscriminant};
Expand All @@ -10,14 +10,28 @@ const INITIAL_STACK_CAPACITY: usize = 64;
/// Passed to all AST visitor functions.
///
/// Provides ability to:
/// * Query parent/ancestor of current node.
/// * Create AST nodes via `ctx.ast`.
/// * Allocate into arena via `ctx.alloc()`.
/// * Query parent/ancestor of current node via [`parent`], [`ancestor`], [`find_ancestor`].
/// * Create AST nodes via AST builder [`ast`].
/// * Allocate into arena via [`alloc`].
///
/// [`parent`]: `TraverseCtx::parent`
/// [`ancestor`]: `TraverseCtx::ancestor`
/// [`find_ancestor`]: `TraverseCtx::find_ancestor`
/// [`ast`]: `TraverseCtx::ast`
/// [`alloc`]: `TraverseCtx::alloc`
pub struct TraverseCtx<'a> {
stack: Vec<Ancestor<'a>>,
pub ast: AstBuilder<'a>,
}

/// Return value when using [`TraverseCtx::find_ancestor`].
pub enum FinderRet<T> {
Found(T),
Stop,
Continue,
}

// Public methods
impl<'a> TraverseCtx<'a> {
/// Create new traversal context.
pub fn new(allocator: &'a Allocator) -> Self {
Expand All @@ -27,9 +41,9 @@ impl<'a> TraverseCtx<'a> {
}

/// Allocate a node in the arena.
/// Returns a `Box<T>`.
/// Returns a [`Box<T>`].
#[inline]
pub fn alloc<T>(&self, node: T) -> oxc_allocator::Box<'a, T> {
pub fn alloc<T>(&self, node: T) -> Box<'a, T> {
self.ast.alloc(node)
}

Expand All @@ -54,7 +68,7 @@ impl<'a> TraverseCtx<'a> {
///
/// `finder` should return:
/// * `FinderRet::Found(value)` to stop walking and return `Some(value)`.
/// * `FinderRet::Stop` to stop walking and return `None`
/// * `FinderRet::Stop` to stop walking and return `None`.
/// * `FinderRet::Continue` to continue walking up.
pub fn find_ancestor<F, O>(&self, finder: F) -> Option<O>
where
Expand All @@ -72,11 +86,17 @@ impl<'a> TraverseCtx<'a> {

/// Get depth of ancestry stack.
/// i.e. How many nodes above this one in the tree.
///
/// NB: A "no parent" ancestor above `Program` counts towards this total.
/// The current node does not.
#[inline]
pub fn ancestors_depth(&self) -> usize {
self.stack.len()
}
}

// Methods used internally within crate
impl<'a> TraverseCtx<'a> {
/// Push item onto stack.
#[inline]
pub(crate) fn push_stack(&mut self, ancestor: Ancestor<'a>) {
Expand All @@ -94,20 +114,30 @@ impl<'a> TraverseCtx<'a> {
}

/// Retag last item on stack.
///
/// i.e. Alter discriminant of `Ancestor` enum, without changing the "payload" it contains
/// of pointer to the ancestor node.
///
/// This is purely a performance optimization. If the last item on stack already contains the
/// correct pointer, then `ctx.retag_stack(3)` is equivalent to:
///
/// ```nocompile
/// ctx.pop_stack();
/// ctx.push_stack(Ancestor::ProgramBody(ProgramWithoutBody(node_ptr)));
/// ```
///
/// (3 is the discriminant for `Ancestor::ProgramBody`)
///
/// `retag_stack` is only a single 2-byte write operation.
///
/// # SAFETY
/// * Stack must not be empty.
/// * `discriminant` must be valid discriminant value for `Ancestor` enum.
/// * Last item on stack must contain type corresponding to provided discriminant.
/// * Last item on stack must contain pointer to type corresponding to provided discriminant.
#[inline]
#[allow(unsafe_code, clippy::ptr_as_ptr, clippy::ref_as_ptr)]
pub(crate) unsafe fn retag_stack(&mut self, discriminant: AncestorDiscriminant) {
*(self.stack.last_mut().unwrap_unchecked() as *mut _ as *mut AncestorDiscriminant) =
discriminant;
}
}

pub enum FinderRet<T> {
Found(T),
Stop,
Continue,
}
125 changes: 125 additions & 0 deletions crates/oxc_traverse/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,65 @@
//! AST traversal with ability to read up the tree from visitors.
//!
//! Please see [`traverse_mut`] for an explanation of API.
//!
//! # Implementation details
//!
//! Most of the code in this crate is generated by a codegen.
//! Codegen is currently written in JavaScript (`scripts/build.mjs`).
//!
//! Do not edit those files, as they'll be over-written by the build script on next run.
//!
//! The scheme which allows reading up the tree is based on making it statically impossible
//! to violate Rust's aliasing rules.
//!
//! Rust's aliasing rules are (roughly):
//! 1. For any object, you can have as many immutable `&` references simultaneously as you like.
//! 2. For any object, you cannot obtain a mutable `&mut` reference if any other references
//! (immutable or mutable) to that same object exist.
//! 3. A `&`/`&mut` ref covers the object itself and the entire tree below it.
//! i.e. you can't hold a `&mut` to child and any reference to parent at same time (except by
//! "re-borrowing").
//!
//! This poses a problem for reading back up the tree in a mutating visitor.
//! In a visitor you hold a `&mut` reference to a node, so therefore cannot obtain a `&` ref to
//! its parent, because the parent owns the node. If you had a `&` ref to the parent, that also acts
//! as a `&` ref to the current node = holding `&` and `&mut` refs to same node simultaneously.
//! Disaster!
//!
//! The solution this crate uses is:
//! 1. Don't create references while traversing down the AST in `walk_*` functions.
//! Use raw pointers instead. `&mut` references are only created in the `enter_*` / `exit_*` methods.
//! 2. Don't allow `enter_*` / `exit_*` to access its entire parent or ancestor, only *other branches*
//! of the tree which lead from the parent/ancestor. The parts of the tree above current node
//! which can be accessed via `ctx.parent()` etc **do not overlap** the part of the tree which
//! is available via `&mut` ref inside `enter_*` / `exit_*`. This makes it impossible to obtain
//! 2 references to same AST node at same time.
//! 3. Again, don't create transient references while getting the fields of ancestors. Use raw pointers
//! to go to the field directly, before creating a `&` ref.
//!
//! The mechanism for this is the `Ancestor` enum. Each AST node type has several `Ancestor` variants
//! e.g. `ProgramWithoutDirectives`, `ProgramWithoutHashbang`, `ProgramWithoutBody`.
//! As the names suggest, each of these types grants access to all the fields of `Program` except one.
//!
//! As `walk_*` functions walk down the tree, they add to the stack of `Ancestors` the appropriate type
//! to prevent access to the path which is being walked down.
//!
//! `walk_*` uses `TraverseCtx::retag_stack` to make it as cheap as possible to update the ancestry
//! stack, but this is purely a performance optimization, not essential to the safety of the scheme.
//!
//! # SAFETY
//! This crate contains a great deal of unsafe code. The entirety of `walk.rs` is unsafe functions
//! using raw pointers.
//!
//! To avoid a drain on compile time asking Rust to parse 1000s of `# SAFETY` comments, the codegen-ed
//! files do not contain comments explaining the safety of every unsafe operation.
//! But everything works according to the principles outlined above.
//!
//! Almost all the code is currently codegen-ed. I (@overlookmotel) would recommend continuing to
//! exclusively use a codegen, and not manually editing these files for "special cases". The safety
//! scheme could very easily be derailed entirely by a single mistake, so in my opinion, it's unwise
//! to edit by hand.
use oxc_allocator::Allocator;

use oxc_ast::ast::Program;
Expand All @@ -11,6 +73,69 @@ mod traverse;
pub use traverse::Traverse;
mod walk;

/// Traverse AST with a [`Traverse`] impl.
///
/// This allows:
/// 1. Reading and mutating down the tree (i.e. children, children's children, ...)
/// 2. Reading up the tree (i.e. parent, grandparent, ...)
///
/// `traverser`'s `enter_*` and `exit_*` methods will be called with a `&mut` ref to current AST node
/// and a [`TraverseCtx`] object.
///
/// [`TraverseCtx`] can be used to access parent or ancestors further up the tree.
/// [`TraverseCtx::parent`] and [`TraverseCtx::ancestor`] return an [`Ancestor`] type.
///
/// [`Ancestor`] is an enum, whose discriminant encodes both the *type* of the parent,
/// and *location* of the child within the parent.
/// e.g. `Ancestor` has variants for `BinaryExpressionLeft` and `BinaryExpressionRight`,
/// not just `BinaryExpression`.
///
/// To avoid violating Rust's aliasing rules, you cannot access the property of the parent
/// which current node is the child of.
///
/// Or, to state the rule more generally: You can read from any branch of the AST *except*
/// the one you are on.
///
/// A silly analogy: You are a tree surgeon working on pruning an old oak tree. You are sitting
/// on a branch high up in the tree. From that position, you can cut off other branches of the tree
/// no problem, but it would be unwise to saw off the branch that you are sitting on.
///
/// In practice: For this JS code:
///
/// ```js
/// x == 1
/// ```
///
/// ```
/// use oxc_ast::ast::*;
/// use oxc_traverse::{Ancestor, Traverse, TraverseCtx};
///
/// struct MyTransform;
///
/// impl<'a> Traverse<'a> for MyTransform {
/// fn enter_numeric_literal(&mut self, node: &mut NumericLiteral<'a>, ctx: &TraverseCtx<'a>) {
/// // Read parent
/// if let Ancestor::BinaryExpressionRight(bin_expr_ref) = ctx.parent() {
/// // This is legal
/// if let Expression::Identifier(id) = bin_expr_ref.left() {
/// println!("left side is ID: {}", &id.name);
/// }
///
/// // This would be a compile failure, because the right side is where we came from
/// // dbg!(bin_expr_ref.right());
/// }
///
/// // Read grandparent
/// if let Some(Ancestor::ExpressionStatementExpression(stmt_ref)) = ctx.ancestor(2) {
/// // This is legal
/// println!("expression stmt's span: {:?}", stmt_ref.span());
///
/// // This would be a compile failure, because the expression is where we came from
/// // dbg!(stmt_ref.expression());
/// }
/// }
/// }
/// ```
#[allow(unsafe_code)]
pub fn traverse_mut<'a, Tr: Traverse<'a>>(
traverser: &mut Tr,
Expand Down

0 comments on commit 0e24b24

Please sign in to comment.