diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml index 3e17dbc5733..57a6a07585b 100644 --- a/boa_engine/Cargo.toml +++ b/boa_engine/Cargo.toml @@ -40,7 +40,7 @@ trace = [] console = [] # Enable Boa's additional ECMAScript features for web browsers. -annex-b = [] +annex-b = ["boa_parser/annex-b"] [dependencies] boa_interner.workspace = true diff --git a/boa_engine/src/builtins/function/mod.rs b/boa_engine/src/builtins/function/mod.rs index f204deed8c6..2d30ffcf05f 100644 --- a/boa_engine/src/builtins/function/mod.rs +++ b/boa_engine/src/builtins/function/mod.rs @@ -614,10 +614,9 @@ impl BuiltInFunctionObject { } else { let mut parameters = Vec::with_capacity(args.len()); for arg in args { - parameters.push(arg.to_string(context)?.as_slice().to_owned()); + parameters.push(arg.to_string(context)?); } - let mut parameters = parameters.join(utf16!(",")); - parameters.push(u16::from(b')')); + let parameters = parameters.join(utf16!(",")); // TODO: make parser generic to u32 iterators let parameters = diff --git a/boa_parser/Cargo.toml b/boa_parser/Cargo.toml index ed3257bbe9e..d7592e3cc75 100644 --- a/boa_parser/Cargo.toml +++ b/boa_parser/Cargo.toml @@ -22,3 +22,6 @@ num-traits = "0.2.15" bitflags = "2.1.0" num-bigint = "0.4.3" regress = "0.5.0" + +[features] +annex-b = [] diff --git a/boa_parser/src/lexer/comment.rs b/boa_parser/src/lexer/comment.rs index abd9633855d..6b23e19ed7e 100644 --- a/boa_parser/src/lexer/comment.rs +++ b/boa_parser/src/lexer/comment.rs @@ -98,7 +98,7 @@ impl Tokenizer for MultiLineComment { } } -///Lexes a first line Hashbang comment +/// Lexes a first line Hashbang comment /// /// More information: /// - [ECMAScript reference][spec] diff --git a/boa_parser/src/lexer/cursor.rs b/boa_parser/src/lexer/cursor.rs index 06ec3dc4666..0e63c741f3c 100644 --- a/boa_parser/src/lexer/cursor.rs +++ b/boa_parser/src/lexer/cursor.rs @@ -8,7 +8,8 @@ use std::io::{self, Bytes, Error, ErrorKind, Read}; pub(super) struct Cursor { iter: InnerIter, pos: Position, - strict_mode: bool, + module: bool, + strict: bool, } impl Cursor { @@ -31,13 +32,24 @@ impl Cursor { } /// Returns if strict mode is currently active. - pub(super) const fn strict_mode(&self) -> bool { - self.strict_mode + pub(super) const fn strict(&self) -> bool { + self.strict } /// Sets the current strict mode. - pub(super) fn set_strict_mode(&mut self, strict_mode: bool) { - self.strict_mode = strict_mode; + pub(super) fn set_strict(&mut self, strict: bool) { + self.strict = strict; + } + + /// Returns if the module mode is currently active. + pub(super) const fn module(&self) -> bool { + self.module + } + + /// Sets the current goal symbol to module. + pub(super) fn set_module(&mut self, module: bool) { + self.module = module; + self.strict = module; } } @@ -50,7 +62,8 @@ where Self { iter: InnerIter::new(inner.bytes()), pos: Position::new(1, 1), - strict_mode: false, + strict: false, + module: false, } } @@ -59,7 +72,8 @@ where Self { iter: InnerIter::new(inner.bytes()), pos, - strict_mode: false, + strict: false, + module: false, } } diff --git a/boa_parser/src/lexer/mod.rs b/boa_parser/src/lexer/mod.rs index 9e838141018..818c89c8e30 100644 --- a/boa_parser/src/lexer/mod.rs +++ b/boa_parser/src/lexer/mod.rs @@ -71,23 +71,6 @@ pub struct Lexer { } impl Lexer { - /// Checks if a character is whitespace as per ECMAScript standards. - /// - /// The Rust `char::is_whitespace` function and the ECMAScript standard use different sets of - /// characters as whitespaces: - /// * Rust uses `\p{White_Space}`, - /// * ECMAScript standard uses `\{Space_Separator}` + `\u{0009}`, `\u{000B}`, `\u{000C}`, `\u{FEFF}` - /// - /// [More information](https://tc39.es/ecma262/#table-32) - const fn is_whitespace(ch: u32) -> bool { - matches!( - ch, - 0x0020 | 0x0009 | 0x000B | 0x000C | 0x00A0 | 0xFEFF | - // Unicode Space_Seperator category (minus \u{0020} and \u{00A0} which are allready stated above) - 0x1680 | 0x2000..=0x200A | 0x202F | 0x205F | 0x3000 - ) - } - /// Sets the goal symbol for the lexer. pub(crate) fn set_goal(&mut self, elm: InputElement) { self.goal_symbol = elm; @@ -99,13 +82,23 @@ impl Lexer { } /// Returns if strict mode is currently active. - pub(super) const fn strict_mode(&self) -> bool { - self.cursor.strict_mode() + pub(super) const fn strict(&self) -> bool { + self.cursor.strict() } /// Sets the current strict mode. - pub(super) fn set_strict_mode(&mut self, strict_mode: bool) { - self.cursor.set_strict_mode(strict_mode); + pub(super) fn set_strict(&mut self, strict: bool) { + self.cursor.set_strict(strict); + } + + /// Returns if module mode is currently active. + pub(super) const fn module(&self) -> bool { + self.cursor.module() + } + + /// Signals that the goal symbol is a module + pub(super) fn set_module(&mut self, module: bool) { + self.cursor.set_module(module); } /// Creates a new lexer. @@ -180,14 +173,38 @@ impl Lexer { } } + /// Skips an HTML close comment (`-->`) if the `annex-b` feature is enabled. + pub(crate) fn skip_html_close(&mut self, interner: &mut Interner) -> Result<(), Error> + where + R: Read, + { + if !cfg!(feature = "annex-b") || self.module() { + return Ok(()); + } + + while self.cursor.peek_char()?.map_or(false, is_whitespace) { + let _next = self.cursor.next_char(); + } + + if self.cursor.peek_n(3)? == [b'-', b'-', b'>'] { + let _next = self.cursor.next_byte(); + let _next = self.cursor.next_byte(); + let _next = self.cursor.next_byte(); + + let start = self.cursor.pos(); + SingleLineComment.lex(&mut self.cursor, start, interner)?; + } + + Ok(()) + } + /// Retrieves the next token from the lexer. /// /// # Errors /// /// Will return `Err` on invalid tokens and invalid reads of the bytes being lexed. // We intentionally don't implement Iterator trait as Result