diff --git a/Cargo.lock b/Cargo.lock index 7e98a81c4d8..e67f0118263 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,6 +74,7 @@ dependencies = [ "boa_ast", "boa_engine", "boa_interner", + "boa_parser", "clap 4.0.18", "colored", "jemallocator", @@ -93,6 +94,7 @@ dependencies = [ "boa_gc", "boa_interner", "boa_macros", + "boa_parser", "boa_profiler", "boa_unicode", "chrono", @@ -135,6 +137,7 @@ dependencies = [ "boa_engine", "boa_gc", "boa_interner", + "boa_parser", "gc", ] @@ -167,6 +170,22 @@ dependencies = [ "syn", ] +[[package]] +name = "boa_parser" +version = "0.16.0" +dependencies = [ + "bitflags", + "boa_ast", + "boa_interner", + "boa_macros", + "boa_profiler", + "boa_unicode", + "fast-float", + "num-bigint", + "num-traits", + "rustc-hash", +] + [[package]] name = "boa_profiler" version = "0.16.0" @@ -184,6 +203,7 @@ dependencies = [ "boa_engine", "boa_gc", "boa_interner", + "boa_parser", "clap 4.0.18", "colored", "fxhash", diff --git a/Cargo.toml b/Cargo.toml index 04c3bad5a5b..1a59dcd481d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "boa_cli", "boa_engine", "boa_ast", + "boa_parser", "boa_gc", "boa_interner", "boa_profiler", @@ -30,6 +31,7 @@ boa_profiler = { version = "0.16.0", path = "boa_profiler" } boa_unicode = { version = "0.16.0", path = "boa_unicode" } boa_macros = { version = "0.16.0", path = "boa_macros" } boa_ast = { version = "0.16.0", path = "boa_ast" } +boa_parser = { version = "0.16.0", path = "boa_parser" } [workspace.metadata.workspaces] allow_branch = "main" diff --git a/boa_cli/Cargo.toml b/boa_cli/Cargo.toml index 3d9963d70e8..c3c15c36e5a 100644 --- a/boa_cli/Cargo.toml +++ b/boa_cli/Cargo.toml @@ -15,6 +15,7 @@ rust-version.workspace = true boa_engine = { workspace = true, features = ["deser", "console"] } boa_ast = { workspace = true, features = ["serde"]} boa_interner.workspace = true +boa_parser.workspace = true rustyline = "10.0.0" rustyline-derive = "0.7.0" clap = { version = "4.0.18", features = ["derive"] } diff --git a/boa_cli/src/main.rs b/boa_cli/src/main.rs index b330225aa4f..82da7c5cf8c 100644 --- a/boa_cli/src/main.rs +++ b/boa_cli/src/main.rs @@ -144,11 +144,9 @@ fn parse_tokens(src: S, context: &mut Context) -> Result, { - use boa_engine::syntax::parser::Parser; - let src_bytes = src.as_ref(); - Parser::new(src_bytes) - .parse_all(context) + boa_parser::Parser::new(src_bytes) + .parse_all(context.interner_mut()) .map_err(|e| format!("ParsingError: {e}")) } diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml index b5f443da7e0..5c9caeaea85 100644 --- a/boa_engine/Cargo.toml +++ b/boa_engine/Cargo.toml @@ -34,6 +34,7 @@ boa_gc.workspace = true boa_profiler.workspace = true boa_macros.workspace = true boa_ast.workspace = true +boa_parser.workspace = true gc = "0.4.1" serde = { version = "1.0.147", features = ["derive", "rc"] } serde_json = "1.0.87" diff --git a/boa_engine/src/builtins/eval/mod.rs b/boa_engine/src/builtins/eval/mod.rs index eb711e0d32c..d23e6146522 100644 --- a/boa_engine/src/builtins/eval/mod.rs +++ b/boa_engine/src/builtins/eval/mod.rs @@ -15,10 +15,13 @@ use crate::{ error::JsNativeError, object::FunctionBuilder, property::Attribute, - Context, JsResult, JsValue, + Context, JsResult, JsString, JsValue, +}; +use boa_ast::operations::{ + contains, contains_arguments, top_level_var_declared_names, ContainsSymbol, }; -use boa_ast::operations::top_level_var_declared_names; use boa_gc::Gc; +use boa_parser::Parser; use boa_profiler::Profiler; #[derive(Debug, Clone, Copy)] @@ -68,6 +71,16 @@ impl Eval { mut strict: bool, context: &mut Context, ) -> JsResult { + #[derive(Debug, Default)] + #[allow(clippy::struct_excessive_bools)] + /// Flags used to throw early errors on invalid `eval` calls. + struct Flags { + in_function: bool, + in_method: bool, + in_derived_constructor: bool, + in_class_field_initializer: bool, + } + /// Possible actions that can be executed after exiting this function to restore the environment to its /// original state. enum EnvStackAction { @@ -93,19 +106,82 @@ impl Eval { debug_assert!(direct || !strict); // 2. If Type(x) is not String, return x. - let Some(x) = x.as_string() else { + // TODO: rework parser to take an iterator of `u32` unicode codepoints + let Some(x) = x.as_string().map(JsString::to_std_string_escaped) else { return Ok(x.clone()); }; // Because of implementation details the following code differs from the spec. - // TODO: rework parser to take an iterator of `u32` unicode codepoints - // Parse the script body and handle early errors (6 - 11) - let body = match context.parse_eval(x.to_std_string_escaped().as_bytes(), direct, strict) { - Ok(body) => body, - Err(e) => return Err(JsNativeError::syntax().with_message(e.to_string()).into()), + // 5. Perform ? HostEnsureCanCompileStrings(evalRealm). + let mut parser = Parser::new(x.as_bytes()); + if strict { + parser.set_strict(); + } + // 11. Perform the following substeps in an implementation-defined order, possibly interleaving parsing and error detection: + // a. Let script be ParseText(StringToCodePoints(x), Script). + // b. If script is a List of errors, throw a SyntaxError exception. + // c. If script Contains ScriptBody is false, return undefined. + // d. Let body be the ScriptBody of script. + let body = parser.parse_eval(direct, context.interner_mut())?; + + // 6. Let inFunction be false. + // 7. Let inMethod be false. + // 8. Let inDerivedConstructor be false. + // 9. Let inClassFieldInitializer be false. + // a. Let thisEnvRec be GetThisEnvironment(). + let flags = match context + .realm + .environments + .get_this_environment() + .as_function_slots() + { + // 10. If direct is true, then + // b. If thisEnvRec is a Function Environment Record, then + Some(function_env) if direct => { + let function_env = function_env.borrow(); + // i. Let F be thisEnvRec.[[FunctionObject]]. + let function_object = function_env.function_object().borrow(); + Flags { + // ii. Set inFunction to true. + in_function: true, + // iii. Set inMethod to thisEnvRec.HasSuperBinding(). + in_method: function_env.has_super_binding(), + // iv. If F.[[ConstructorKind]] is derived, set inDerivedConstructor to true. + in_derived_constructor: function_object + .as_function() + .expect("must be function object") + .is_derived_constructor(), + // TODO: + // v. Let classFieldInitializerName be F.[[ClassFieldInitializerName]]. + // vi. If classFieldInitializerName is not empty, set inClassFieldInitializer to true. + in_class_field_initializer: false, + } + } + _ => Flags::default(), }; + if !flags.in_function && contains(&body, ContainsSymbol::NewTarget) { + return Err(JsNativeError::syntax() + .with_message("invalid `new.target` expression inside eval") + .into()); + } + if !flags.in_method && contains(&body, ContainsSymbol::SuperProperty) { + return Err(JsNativeError::syntax() + .with_message("invalid `super` reference inside eval") + .into()); + } + if !flags.in_derived_constructor && contains(&body, ContainsSymbol::SuperCall) { + return Err(JsNativeError::syntax() + .with_message("invalid `super` call inside eval") + .into()); + } + if flags.in_class_field_initializer && contains_arguments(&body) { + return Err(JsNativeError::syntax() + .with_message("invalid `arguments` reference inside eval") + .into()); + } + strict |= body.strict(); // Because our environment model does not map directly to the spec, this section looks very different. diff --git a/boa_engine/src/builtins/function/mod.rs b/boa_engine/src/builtins/function/mod.rs index dc8591b2957..5cc70deec57 100644 --- a/boa_engine/src/builtins/function/mod.rs +++ b/boa_engine/src/builtins/function/mod.rs @@ -26,7 +26,6 @@ use crate::{ property::{Attribute, PropertyDescriptor, PropertyKey}, string::utf16, symbol::WellKnownSymbols, - syntax::Parser, value::IntegerOrInfinity, Context, JsResult, JsString, JsValue, }; @@ -37,6 +36,7 @@ use boa_ast::{ }; use boa_gc::{self, custom_trace, Finalize, Gc, Trace}; use boa_interner::Sym; +use boa_parser::Parser; use boa_profiler::Profiler; use dyn_clone::DynClone; use std::{ diff --git a/boa_engine/src/builtins/regexp/mod.rs b/boa_engine/src/builtins/regexp/mod.rs index dd6f9f583de..f8b3ab6b177 100644 --- a/boa_engine/src/builtins/regexp/mod.rs +++ b/boa_engine/src/builtins/regexp/mod.rs @@ -25,10 +25,10 @@ use crate::{ property::{Attribute, PropertyDescriptorBuilder}, string::{utf16, CodePoint}, symbol::WellKnownSymbols, - syntax::lexer::regex::RegExpFlags, value::JsValue, Context, JsResult, JsString, }; +use boa_parser::lexer::regex::RegExpFlags; use boa_profiler::Profiler; use regress::Regex; use std::str::FromStr; diff --git a/boa_engine/src/context/mod.rs b/boa_engine/src/context/mod.rs index f2c97fb751d..f5531cdf0a7 100644 --- a/boa_engine/src/context/mod.rs +++ b/boa_engine/src/context/mod.rs @@ -20,7 +20,6 @@ use crate::{ object::{FunctionBuilder, GlobalPropertyMap, JsObject, ObjectData}, property::{Attribute, PropertyDescriptor, PropertyKey}, realm::Realm, - syntax::{parser::ParseError, Parser}, vm::{CallFrame, CodeBlock, FinallyReturn, GeneratorResumeKind, Vm}, JsResult, JsString, JsValue, }; @@ -28,6 +27,7 @@ use crate::{ use boa_ast::StatementList; use boa_gc::Gc; use boa_interner::{Interner, Sym}; +use boa_parser::{Error as ParseError, Parser}; use boa_profiler::Profiler; #[cfg(feature = "intl")] @@ -162,24 +162,7 @@ impl Context { S: AsRef<[u8]>, { let mut parser = Parser::new(src.as_ref()); - parser.parse_all(self) - } - - /// Parse the given source text with eval specific handling. - pub(crate) fn parse_eval( - &mut self, - src: S, - direct: bool, - strict: bool, - ) -> Result - where - S: AsRef<[u8]>, - { - let mut parser = Parser::new(src.as_ref()); - if strict { - parser.set_strict(); - } - parser.parse_eval(direct, self) + parser.parse_all(&mut self.interner) } /// `Call ( F, V [ , argumentsList ] )` @@ -471,7 +454,7 @@ impl Context { { let main_timer = Profiler::global().start_event("Evaluation", "Main"); - let statement_list = Parser::new(src.as_ref()).parse_all(self)?; + let statement_list = Parser::new(src.as_ref()).parse_all(&mut self.interner)?; let code_block = self.compile(&statement_list)?; let result = self.execute(code_block); diff --git a/boa_engine/src/error.rs b/boa_engine/src/error.rs index 07f5ab14ed0..00e124da31f 100644 --- a/boa_engine/src/error.rs +++ b/boa_engine/src/error.rs @@ -5,7 +5,6 @@ use crate::{ object::JsObject, object::ObjectData, property::PropertyDescriptor, - syntax::parser, Context, JsString, JsValue, }; use boa_gc::{Finalize, Trace}; @@ -320,8 +319,8 @@ impl JsError { } } -impl From for JsError { - fn from(err: parser::ParseError) -> Self { +impl From for JsError { + fn from(err: boa_parser::Error) -> Self { Self::from(JsNativeError::from(err)) } } @@ -655,8 +654,8 @@ impl JsNativeError { } } -impl From for JsNativeError { - fn from(err: parser::ParseError) -> Self { +impl From for JsNativeError { + fn from(err: boa_parser::Error) -> Self { Self::syntax().with_message(err.to_string()) } } diff --git a/boa_engine/src/lib.rs b/boa_engine/src/lib.rs index 791f12bb9f3..0a5df842367 100644 --- a/boa_engine/src/lib.rs +++ b/boa_engine/src/lib.rs @@ -89,7 +89,6 @@ pub mod property; pub mod realm; pub mod string; pub mod symbol; -pub mod syntax; pub mod value; pub mod vm; diff --git a/boa_engine/src/syntax/mod.rs b/boa_engine/src/syntax/mod.rs deleted file mode 100644 index 7ced1e404cf..00000000000 --- a/boa_engine/src/syntax/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! Syntactical analysis, such as Parsing and Lexing. -// syntax module has a lot of acronyms - -pub mod lexer; -pub mod parser; - -pub use lexer::Lexer; -pub use parser::Parser; diff --git a/boa_examples/Cargo.toml b/boa_examples/Cargo.toml index 931c89374e4..28b1be49752 100644 --- a/boa_examples/Cargo.toml +++ b/boa_examples/Cargo.toml @@ -16,4 +16,5 @@ boa_engine = { workspace = true, features = ["console"] } boa_ast.workspace = true boa_interner.workspace = true boa_gc.workspace = true +boa_parser.workspace = true gc = "0.4.1" diff --git a/boa_examples/src/bin/commuter_visitor.rs b/boa_examples/src/bin/commuter_visitor.rs index c53a1be0fa5..2668b69c209 100644 --- a/boa_examples/src/bin/commuter_visitor.rs +++ b/boa_examples/src/bin/commuter_visitor.rs @@ -10,8 +10,9 @@ use boa_ast::{ visitor::{VisitWith, VisitorMut}, Expression, }; -use boa_engine::{syntax::Parser, Context}; +use boa_engine::Context; use boa_interner::ToInternedString; +use boa_parser::Parser; use core::ops::ControlFlow; use std::{convert::Infallible, fs::File, io::BufReader}; @@ -69,7 +70,7 @@ fn main() { )); let mut ctx = Context::default(); - let mut statements = parser.parse_all(&mut ctx).unwrap(); + let mut statements = parser.parse_all(ctx.interner_mut()).unwrap(); let mut visitor = CommutorVisitor::default(); diff --git a/boa_examples/src/bin/symbol_visitor.rs b/boa_examples/src/bin/symbol_visitor.rs index 8c5c898ff21..a045b4622c3 100644 --- a/boa_examples/src/bin/symbol_visitor.rs +++ b/boa_examples/src/bin/symbol_visitor.rs @@ -3,8 +3,9 @@ // which mutates the AST. use boa_ast::visitor::Visitor; -use boa_engine::{syntax::Parser, Context}; +use boa_engine::Context; use boa_interner::Sym; +use boa_parser::Parser; use core::ops::ControlFlow; use std::{collections::HashSet, convert::Infallible, fs::File, io::BufReader}; @@ -28,7 +29,7 @@ fn main() { )); let mut ctx = Context::default(); - let statements = parser.parse_all(&mut ctx).unwrap(); + let statements = parser.parse_all(ctx.interner_mut()).unwrap(); let mut visitor = SymbolVisitor::default(); diff --git a/boa_parser/Cargo.toml b/boa_parser/Cargo.toml new file mode 100644 index 00000000000..a74a0936790 --- /dev/null +++ b/boa_parser/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "boa_parser" +description = "ECMAScript parser for the Boa JavaScript engine." +keywords = ["javascript", "js", "syntax", "parser"] +categories = ["parser-implementations", "compilers"] +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[features] +# serde = ["boa_interner/serde", "dep:serde"] + +[dependencies] +boa_interner.workspace = true +boa_macros.workspace = true +boa_ast.workspace = true +boa_profiler.workspace = true +boa_unicode.workspace = true +rustc-hash = "1.1.0" +fast-float = "0.2.0" +num-traits = "0.2.15" +bitflags = "1.3.2" +num-bigint = "0.4.3" \ No newline at end of file diff --git a/boa_engine/src/syntax/parser/error.rs b/boa_parser/src/error.rs similarity index 88% rename from boa_engine/src/syntax/parser/error.rs rename to boa_parser/src/error.rs index f0ccb28bdc0..648fb2c0a8f 100644 --- a/boa_engine/src/syntax/parser/error.rs +++ b/boa_parser/src/error.rs @@ -1,32 +1,32 @@ //! Error and result implementation for the parser. -use crate::syntax::lexer::Error as LexError; +use crate::lexer::Error as LexError; use boa_ast::{Position, Span}; use std::fmt; /// Result of a parsing operation. -pub type ParseResult = Result; +pub type ParseResult = Result; pub(crate) trait ErrorContext { fn context(self, context: &'static str) -> Self; } -impl ErrorContext for Result { +impl ErrorContext for Result { fn context(self, context: &'static str) -> Self { self.map_err(|e| e.context(context)) } } -impl From for ParseError { +impl From for Error { fn from(e: LexError) -> Self { Self::lex(e) } } -/// `ParseError` is an enum which represents errors encounted during parsing an expression +/// An enum which represents errors encounted during parsing an expression #[derive(Debug)] -pub enum ParseError { +pub enum Error { /// When it expected a certain kind of token, but got another as part of something Expected { expected: Box<[String]>, @@ -56,7 +56,7 @@ pub enum ParseError { }, } -impl ParseError { +impl Error { /// Changes the context of the error, if any. fn context(self, new_context: &'static str) -> Self { match self { @@ -71,7 +71,7 @@ impl ParseError { } /// Creates an `Expected` parsing error. - pub(super) fn expected(expected: E, found: F, span: Span, context: &'static str) -> Self + pub(crate) fn expected(expected: E, found: F, span: Span, context: &'static str) -> Self where E: Into>, F: Into>, @@ -85,7 +85,7 @@ impl ParseError { } /// Creates an `Expected` parsing error. - pub(super) fn unexpected(found: F, span: Span, message: C) -> Self + pub(crate) fn unexpected(found: F, span: Span, message: C) -> Self where F: Into>, C: Into>, @@ -103,7 +103,7 @@ impl ParseError { } /// Creates a "general" parsing error with the specific error message for a wrong function declaration in non-strict mode. - pub(super) fn wrong_function_declaration_non_strict(position: Position) -> Self { + pub(crate) fn wrong_function_declaration_non_strict(position: Position) -> Self { Self::General { message: "In non-strict mode code, functions can only be declared at top level, inside a block, or as the body of an if statement.", position @@ -112,7 +112,7 @@ impl ParseError { /// Creates a "general" parsing error with the specific error message for a wrong function declaration with label. #[inline] - pub(super) fn wrong_labelled_function_declaration(position: Position) -> Self { + pub(crate) fn wrong_labelled_function_declaration(position: Position) -> Self { Self::General { message: "Labelled functions can only be declared at top level or inside a block", position, @@ -120,18 +120,18 @@ impl ParseError { } /// Creates a parsing error from a lexing error. - pub(super) fn lex(e: LexError) -> Self { + pub(crate) fn lex(e: LexError) -> Self { Self::Lex { err: e } } /// Creates a new `Unimplemented` parsing error. #[allow(dead_code)] - pub(super) fn unimplemented(message: &'static str, position: Position) -> Self { + pub(crate) fn unimplemented(message: &'static str, position: Position) -> Self { Self::Unimplemented { message, position } } } -impl fmt::Display for ParseError { +impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Expected { diff --git a/boa_engine/src/syntax/lexer/comment.rs b/boa_parser/src/lexer/comment.rs similarity index 98% rename from boa_engine/src/syntax/lexer/comment.rs rename to boa_parser/src/lexer/comment.rs index bc0a1d2f673..127c6932ef3 100644 --- a/boa_engine/src/syntax/lexer/comment.rs +++ b/boa_parser/src/lexer/comment.rs @@ -1,7 +1,7 @@ //! This module implements lexing for comments used in the JavaScript programing language. use super::{Cursor, Error, Tokenizer}; -use crate::syntax::lexer::{Token, TokenKind}; +use crate::lexer::{Token, TokenKind}; use boa_ast::{Position, Span}; use boa_interner::Interner; use boa_profiler::Profiler; diff --git a/boa_engine/src/syntax/lexer/cursor.rs b/boa_parser/src/lexer/cursor.rs similarity index 100% rename from boa_engine/src/syntax/lexer/cursor.rs rename to boa_parser/src/lexer/cursor.rs diff --git a/boa_engine/src/syntax/lexer/error.rs b/boa_parser/src/lexer/error.rs similarity index 100% rename from boa_engine/src/syntax/lexer/error.rs rename to boa_parser/src/lexer/error.rs diff --git a/boa_engine/src/syntax/lexer/identifier.rs b/boa_parser/src/lexer/identifier.rs similarity index 98% rename from boa_engine/src/syntax/lexer/identifier.rs rename to boa_parser/src/lexer/identifier.rs index 2520281039d..6a63cc78e35 100644 --- a/boa_engine/src/syntax/lexer/identifier.rs +++ b/boa_parser/src/lexer/identifier.rs @@ -1,7 +1,7 @@ //! This module implements lexing for identifiers (foo, myvar, etc.) used in the JavaScript programing language. use super::{Cursor, Error, Tokenizer}; -use crate::syntax::lexer::{StringLiteral, Token, TokenKind}; +use crate::lexer::{StringLiteral, Token, TokenKind}; use boa_ast::{Keyword, Position, Span}; use boa_interner::Interner; use boa_profiler::Profiler; diff --git a/boa_engine/src/syntax/lexer/mod.rs b/boa_parser/src/lexer/mod.rs similarity index 92% rename from boa_engine/src/syntax/lexer/mod.rs rename to boa_parser/src/lexer/mod.rs index de064198064..3da48f9e926 100644 --- a/boa_engine/src/syntax/lexer/mod.rs +++ b/boa_parser/src/lexer/mod.rs @@ -187,6 +187,10 @@ impl Lexer { } /// Retrieves the next token from the lexer. + /// + /// # Errors + /// + /// Will return `Err` on invalid tokens and invalid reads of the bytes being lexed. // We intentionally don't implement Iterator trait as Result