diff --git a/tasks/ast_tools/src/generators/raw_transfer.rs b/tasks/ast_tools/src/generators/raw_transfer.rs index 41e8fbb4bba3d..fdd949b9887a0 100644 --- a/tasks/ast_tools/src/generators/raw_transfer.rs +++ b/tasks/ast_tools/src/generators/raw_transfer.rs @@ -8,23 +8,16 @@ use proc_macro2::TokenStream; use quote::quote; use rustc_hash::FxHashSet; -use oxc_allocator::{Allocator, CloneIn}; +use oxc_allocator::Allocator; use oxc_ast::{ AstBuilder, NONE, ast::{ - Argument, BindingPatternKind, Expression, FormalParameterKind, FunctionType, - LogicalOperator, ObjectExpression, ObjectPropertyKind, Program, PropertyKind, Statement, - UnaryOperator, + Argument, Expression, FormalParameterKind, FunctionType, LogicalOperator, ObjectExpression, + ObjectPropertyKind, Program, PropertyKind, }, }; -use oxc_ast_visit::{VisitMut, walk_mut}; -use oxc_codegen::Codegen as Printer; -use oxc_minifier::{ - CompressOptions, CompressOptionsKeepNames, Minifier, MinifierOptions, PropertyReadSideEffects, - TreeShakeOptions, -}; -use oxc_parser::Parser; -use oxc_span::{SPAN, SourceType}; +use oxc_ast_visit::VisitMut; +use oxc_span::SPAN; use crate::{ ALLOCATOR_CRATE_PATH, Generator, NAPI_PARSER_PACKAGE_PATH, OXLINT_APP_PATH, @@ -33,7 +26,7 @@ use crate::{ get_fieldless_variant_value, get_struct_field_name, should_flatten_field, should_skip_enum_variant, should_skip_field, }, - output::Output, + output::{Output, javascript::VariantGenerator}, schema::{ BoxDef, CellDef, Def, EnumDef, FieldDef, MetaType, OptionDef, PointerDef, PrimitiveDef, Schema, StructDef, TypeDef, TypeId, VecDef, @@ -129,6 +122,7 @@ impl Generator for RawTransferGenerator { /// /// When printing the JS and TS deserializers, the value of `IS_TS` is set to `true` or `false`, /// and minifier then shakes out the dead code for each. +#[expect(clippy::items_after_statements)] fn generate_deserializers( consts: Constants, schema: &Schema, @@ -145,12 +139,6 @@ fn generate_deserializers( let mut code = format!(" let uint8, uint32, float64, sourceText, sourceIsAscii, sourceByteLen; - const IS_TS = false; - const RANGE = false; - const LOC = false; - const PARENT = false; - const PRESERVE_PARENS = false; - const textDecoder = new TextDecoder('utf-8', {{ ignoreBOM: true }}), decodeStr = textDecoder.decode.bind(textDecoder), {{ fromCodePoint }} = String; @@ -215,62 +203,64 @@ fn generate_deserializers( } } - // Parse generated code - let allocator = Allocator::new(); - let source_type = SourceType::mjs(); - let parser_ret = Parser::new(&allocator, &code, source_type).parse(); - assert!(parser_ret.errors.is_empty(), "Parse errors: {:#?}", parser_ret.errors); - let program = parser_ret.program; - - // Create deserializers with various settings, by setting `IS_TS`, `RANGE` and `PRESERVE_PARENS` consts, - // and running through minifier to shake out irrelevant code - let mut print_allocator = Allocator::new(); - let mut deserializers = vec![]; - let mut create_deserializer = |is_ts, range, loc, parent, preserve_parens| { - let mut program = program.clone_in(&print_allocator); - replace_const(&mut program, "IS_TS", is_ts); - replace_const(&mut program, "RANGE", range); - replace_const(&mut program, "LOC", loc); - replace_const(&mut program, "PARENT", parent); - replace_const(&mut program, "PRESERVE_PARENS", preserve_parens); - - if loc { - assert!(range, "`loc` requires `range`"); - LocFieldAdder::new(&allocator).visit_program(&mut program); - } + // Create deserializers with various settings, by setting `IS_TS`, `RANGE`, `LOC`, `PARENT` + // and `PRESERVE_PARENS` consts, and running through minifier to shake out irrelevant code + struct VariantGen { + variant_names: Vec, + } - let code = print_minified(&mut program, &print_allocator); - print_allocator.reset(); + impl VariantGenerator<5> for VariantGen { + const FLAG_NAMES: [&str; 5] = ["IS_TS", "RANGE", "LOC", "PARENT", "PRESERVE_PARENS"]; - let mut name = if is_ts { "ts" } else { "js" }.to_string(); - if range { - name.push_str("_range"); - } - if loc { - name.push_str("_loc"); - } - if parent { - name.push_str("_parent"); - } - if !preserve_parens { - name.push_str("_no_parens"); - } + fn variants(&mut self) -> Vec<[bool; 5]> { + let mut variants = Vec::with_capacity(9); - deserializers.push((name, code)); - }; + for is_ts in [false, true] { + for range in [false, true] { + for parent in [false, true] { + let mut name = if is_ts { "ts" } else { "js" }.to_string(); + if range { + name.push_str("_range"); + } + if parent { + name.push_str("_parent"); + } + self.variant_names.push(name); + + variants.push([ + is_ts, range, /* loc */ false, parent, + /* preserve_parens */ true, + ]); + } + } + } - for is_ts in [false, true] { - for range in [false, true] { - for parent in [false, true] { - create_deserializer(is_ts, range, false, parent, true); + self.variant_names.push("ts_range_loc_parent_no_parens".to_string()); + variants.push([ + /* is_ts */ true, /* range */ true, /* loc */ true, + /* parent */ true, /* preserve_parens */ false, + ]); + + variants + } + + fn pre_process_variant<'a>( + &mut self, + program: &mut Program<'a>, + flags: [bool; 5], + allocator: &'a Allocator, + ) { + if flags[2] { + // `loc` enabled + LocFieldAdder::new(allocator).visit_program(program); } } } - // `PRESERVE_PARENS = false` is only required for linter - create_deserializer(true, true, true, true, false); + let mut generator = VariantGen { variant_names: vec![] }; + let codes = generator.generate(&code); - deserializers + generator.variant_names.into_iter().zip(codes).collect() } /// Type of deserializer in which some code appears. @@ -1283,94 +1273,6 @@ fn get_constants(schema: &Schema) -> Constants { } } -/// Replace the value of a `const` declaration with `true` / `false`. -/// -/// Only replaces `const`s defined at top level which are currently defined as a boolean. -fn replace_const(program: &mut Program<'_>, const_name: &str, value: bool) { - for stmt in &mut program.body { - let Statement::VariableDeclaration(var_decl) = stmt else { continue }; - if !var_decl.kind.is_const() { - continue; - } - - for declarator in &mut var_decl.declarations { - if let BindingPatternKind::BindingIdentifier(ident) = &declarator.id.kind - && ident.name == const_name - { - let init = declarator.init.as_mut().unwrap(); - let Expression::BooleanLiteral(bool_lit) = init else { continue }; - bool_lit.value = value; - return; - } - } - } - - panic!("`{const_name}` const not found"); -} - -/// Print AST with minified syntax. -/// -/// Do not remove whitespace, or mangle symbols. -/// Purpose is not to compress length of code, but to remove dead code. -fn print_minified<'a>(program: &mut Program<'a>, allocator: &'a Allocator) -> String { - // Minify - let minify_options = MinifierOptions { - mangle: None, - compress: Some(CompressOptions { - keep_names: CompressOptionsKeepNames::all_true(), - sequences: false, - treeshake: TreeShakeOptions { - property_read_side_effects: PropertyReadSideEffects::None, - ..TreeShakeOptions::default() - }, - ..CompressOptions::default() - }), - }; - Minifier::new(minify_options).minify(allocator, program); - - // Revert minification of `true` to `!0` and `false` to `!1`. It hurts readability. - let mut unminifier = BooleanUnminifier::new(allocator); - unminifier.visit_program(program); - - // Print. Add back line breaks between functions to aid readability. - let mut code = Printer::new().build(program).code; - - #[expect(clippy::items_after_statements)] - static RE: Lazy = lazy_regex!(r"\n(function|export) "); - code = RE - .replace_all(&code, |caps: &Captures| { - // `format!("\n\n{} ", &caps[1])` would be simpler, but this avoids allocations - if &caps[1] == "function" { "\n\nfunction " } else { "\n\nexport " } - }) - .into_owned(); - - code -} - -/// Visitor which converts `!0` to `true` and `!1` to `false`. -struct BooleanUnminifier<'a> { - ast: AstBuilder<'a>, -} - -impl<'a> BooleanUnminifier<'a> { - fn new(allocator: &'a Allocator) -> Self { - Self { ast: AstBuilder::new(allocator) } - } -} - -impl<'a> VisitMut<'a> for BooleanUnminifier<'a> { - fn visit_expression(&mut self, expr: &mut Expression<'a>) { - if let Expression::UnaryExpression(unary_expr) = expr - && unary_expr.operator == UnaryOperator::LogicalNot - && let Expression::NumericLiteral(lit) = &unary_expr.argument - { - *expr = self.ast.expression_boolean_literal(unary_expr.span, lit.value == 0.0); - return; - } - walk_mut::walk_expression(self, expr); - } -} - /// Visitor to add `loc` field after `range` in all deserialize functions. /// /// Works on AST pre-minification. diff --git a/tasks/ast_tools/src/output/javascript.rs b/tasks/ast_tools/src/output/javascript.rs index 00dfedc6e675f..faebf22d70697 100644 --- a/tasks/ast_tools/src/output/javascript.rs +++ b/tasks/ast_tools/src/output/javascript.rs @@ -3,7 +3,23 @@ use std::{ process::{Command, Stdio}, }; -use crate::logln; +use lazy_regex::{Captures, Lazy, Regex, lazy_regex}; + +use oxc_allocator::{Allocator, CloneIn}; +use oxc_ast::{ + AstBuilder, + ast::{Expression, Program, Statement, UnaryOperator}, +}; +use oxc_ast_visit::{VisitMut, walk_mut}; +use oxc_codegen::Codegen; +use oxc_minifier::{ + CompressOptions, CompressOptionsKeepNames, Minifier, MinifierOptions, PropertyReadSideEffects, + TreeShakeOptions, +}; +use oxc_parser::Parser; +use oxc_span::SourceType; + +use crate::{logln, utils::write_it}; use super::add_header; @@ -38,3 +54,248 @@ fn format(source_text: &str) -> String { source_text.to_string() } } + +/// Trait to generate several variants of JS code. +/// +/// `const` statements will be inserted at the top of the file for the flags listed in `FLAG_NAMES`. +/// +/// For each variant, these variables are set to the values provided. +/// Minifier is then run to shake out any dead code. +/// +/// ## Basic example +/// +/// ``` +/// struct Gen; +/// +/// impl VariantGenerator<1> for Gen { // `1` is the number of flags +/// const FLAG_NAMES: [&str; 1] = ["DO_STUFF"]; +/// } +/// +/// // Generate 2 variants with `DO_STUFF = false` and `DO_STUFF = true` +/// let mut variants: Vec = Gen.generate(&code); +/// assert_eq!(variants.len(), 2); +/// let no_stuff = &variants[0]; +/// let with_stuff = &variants[1]; +/// ``` +/// +/// ```js +/// // This is inserted by `VariantGenerator` +/// const DO_STUFF = false; // or `true` +/// +/// // Code given as input to `generate` can use these consts to gate code. +/// // Minifier will remove it from the variants where `DO_STUFF` is `false`. +/// if (DO_STUFF) doStuff(); +/// ``` +/// +/// ## Specifying variants +/// +/// By default, `generate` produces variants for every possible combination of flags. +/// To reduce the number of variants, implement `variants` method. +/// +/// ``` +/// struct Gen; +/// +/// impl VariantGenerator<3> for Gen { +/// const FLAG_NAMES: [&str; 3] = ["STUFF", "OTHER_STUFF", "MORE_STUFF"]; +/// +/// /// Only generate 2 variants. +/// fn variants(&mut self) -> Vec<[bool; 3]> { +/// vec![ +/// [/* STUFF */ true, /* OTHER_STUFF */ false, /* MORE_STUFF */ false], +/// [/* STUFF */ false, /* OTHER_STUFF */ true, /* MORE_STUFF */ true], +/// ] +/// } +/// } +/// ``` +/// +/// ## Pre-processing +/// +/// If you need to modify the AST in other ways for certain variants, implement `pre_process_variant` method. +/// +/// `pre_process_variant` is called for each variant, *before* minifier is applied to AST. +/// +/// ``` +/// struct Gen; +/// +/// impl VariantGenerator<2> for Gen { +/// const FLAG_NAMES: [&str; 2] = ["STUFF", "OTHER_STUFF"]; +/// +/// fn pre_process_variant<'a>( +/// &mut self, program: &mut Program<'a>, flags: [bool; 2], allocator: &'a Allocator +/// ) { +/// if /* OTHER_STUFF */ flags[1] { +/// OtherStuffVisitor(allocator).visit_program(program); +/// } +/// } +/// } +/// +/// struct OtherStuffVisitor<'a>(&'a Allocator); +/// +/// impl<'a> VisitMut<'a> for OtherStuffVisitor<'a> { +/// // ... modify AST ... +/// } +/// ``` +pub trait VariantGenerator { + /// Names of flag consts in code. + const FLAG_NAMES: [&str; FLAG_COUNT]; + + /// Get variants required. + /// + /// Return `Vec` of flag values, in same order as `FLAG_NAMES`. + /// + /// By default generates a variant for every possible combination of flags. + fn variants(&mut self) -> Vec<[bool; FLAG_COUNT]> { + let variant_count = 1_usize << FLAG_COUNT; + (0..variant_count) + .map(|variant_index| { + let mut flags = [false; FLAG_COUNT]; + for (flag_index, flag) in flags.iter_mut().enumerate() { + *flag = (variant_index & (1 << flag_index)) != 0; + } + flags + }) + .collect() + } + + /// Perform optional pre-processing on AST of a variant before it's minified. + /// + /// By default, does nothing. + #[expect(unused_variables)] + #[inline] + fn pre_process_variant<'a>( + &mut self, + program: &mut Program<'a>, + flags: [bool; FLAG_COUNT], + allocator: &'a Allocator, + ) { + } + + /// Generate variants. + fn generate(&mut self, code: &str) -> Vec { + // Add `const` statements to top of file + let input_code = code; + let mut code = String::new(); + for flag_name in Self::FLAG_NAMES { + write_it!(code, "const {flag_name} = false;\n"); + } + code.push('\n'); + code.push_str(input_code); + + // Parse + let allocator = Allocator::new(); + let mut program = parse_js(&code, &allocator); + + // Get details of variants. + let mut variants = self.variants(); + + // Generate variants. + // Handle last separately to avoid cloning AST 1 more time than necessary. + let mut print_allocator = Allocator::new(); + let mut outputs = Vec::with_capacity(variants.len()); + + let Some(last_variant) = variants.pop() else { return outputs }; + + for flags in variants { + let mut program = program.clone_in(&print_allocator); + outputs.push(self.generate_variant(&mut program, flags, &print_allocator)); + print_allocator.reset(); + } + + outputs.push(self.generate_variant(&mut program, last_variant, &print_allocator)); + + outputs + } + + /// Generate variants for a set of flags. + fn generate_variant<'a>( + &mut self, + program: &mut Program<'a>, + flags: [bool; FLAG_COUNT], + allocator: &'a Allocator, + ) -> String { + for (stmt_index, value) in flags.into_iter().enumerate() { + let stmt = &mut program.body[stmt_index]; + let Statement::VariableDeclaration(var_decl) = stmt else { unreachable!() }; + let declarator = &mut var_decl.declarations[0]; + let Some(Expression::BooleanLiteral(bool_lit)) = &mut declarator.init else { + unreachable!() + }; + bool_lit.value = value; + } + + self.pre_process_variant(program, flags, allocator); + + print_minified(program, allocator) + } +} + +/// Parse file. +pub fn parse_js<'a>(source_text: &'a str, allocator: &'a Allocator) -> Program<'a> { + let source_type = SourceType::mjs(); + let parser_ret = Parser::new(allocator, source_text, source_type).parse(); + assert!(parser_ret.errors.is_empty(), "Parse errors: {:#?}", parser_ret.errors); + parser_ret.program +} + +/// Print AST with minified syntax. +/// +/// Do not remove whitespace, or mangle symbols. +/// Purpose is not to compress length of code, but to remove dead code. +pub fn print_minified<'a>(program: &mut Program<'a>, allocator: &'a Allocator) -> String { + // Minify + let minify_options = MinifierOptions { + mangle: None, + compress: Some(CompressOptions { + keep_names: CompressOptionsKeepNames::all_true(), + sequences: false, + treeshake: TreeShakeOptions { + property_read_side_effects: PropertyReadSideEffects::None, + ..TreeShakeOptions::default() + }, + ..CompressOptions::default() + }), + }; + Minifier::new(minify_options).minify(allocator, program); + + // Revert minification of `true` to `!0` and `false` to `!1`. It hurts readability. + let mut unminifier = BooleanUnminifier::new(allocator); + unminifier.visit_program(program); + + // Print + let code = Codegen::new().build(program).code; + + // Add back line breaks between function declarations and exports, to aid readability + #[expect(clippy::items_after_statements)] + static REGEX: Lazy = lazy_regex!(r"\n(function|export) "); + + REGEX + .replace_all(&code, |caps: &Captures| { + // `format!("\n\n{} ", &caps[1])` would be simpler, but this avoids allocations + if &caps[1] == "function" { "\n\nfunction " } else { "\n\nexport " } + }) + .into_owned() +} + +/// Visitor which converts `!0` to `true` and `!1` to `false`. +struct BooleanUnminifier<'a> { + ast: AstBuilder<'a>, +} + +impl<'a> BooleanUnminifier<'a> { + fn new(allocator: &'a Allocator) -> Self { + Self { ast: AstBuilder::new(allocator) } + } +} + +impl<'a> VisitMut<'a> for BooleanUnminifier<'a> { + fn visit_expression(&mut self, expr: &mut Expression<'a>) { + if let Expression::UnaryExpression(unary_expr) = expr + && unary_expr.operator == UnaryOperator::LogicalNot + && let Expression::NumericLiteral(lit) = &unary_expr.argument + { + *expr = self.ast.expression_boolean_literal(unary_expr.span, lit.value == 0.0); + return; + } + walk_mut::walk_expression(self, expr); + } +} diff --git a/tasks/ast_tools/src/output/mod.rs b/tasks/ast_tools/src/output/mod.rs index e58b0df095a97..cf4bea2fa289d 100644 --- a/tasks/ast_tools/src/output/mod.rs +++ b/tasks/ast_tools/src/output/mod.rs @@ -5,7 +5,7 @@ use proc_macro2::TokenStream; use crate::{log, log_result}; -mod javascript; +pub mod javascript; mod rust; mod yaml; use javascript::print_javascript;