boa-dev · jedel1043 · Apr 13, 2023 · Apr 13, 2023 · Apr 13, 2023
diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml
@@ -40,7 +40,7 @@ trace = []
 console = []
 
 # Enable Boa's additional ECMAScript features for web browsers.
-annex-b = []
+annex-b = ["boa_parser/annex-b"]
 
 [dependencies]
 boa_interner.workspace = true

diff --git a/boa_engine/src/builtins/function/mod.rs b/boa_engine/src/builtins/function/mod.rs
@@ -614,10 +614,9 @@ impl BuiltInFunctionObject {
             } else {
                 let mut parameters = Vec::with_capacity(args.len());
                 for arg in args {
-                    parameters.push(arg.to_string(context)?.as_slice().to_owned());
+                    parameters.push(arg.to_string(context)?);
                 }
-                let mut parameters = parameters.join(utf16!(","));
-                parameters.push(u16::from(b')'));
+                let parameters = parameters.join(utf16!(","));
 
                 // TODO: make parser generic to u32 iterators
                 let parameters =

diff --git a/boa_parser/Cargo.toml b/boa_parser/Cargo.toml
@@ -22,3 +22,6 @@ num-traits = "0.2.15"
 bitflags = "2.1.0"
 num-bigint = "0.4.3"
 regress = "0.5.0"
+
+[features]
+annex-b = []
diff --git a/boa_parser/src/lexer/comment.rs b/boa_parser/src/lexer/comment.rs
@@ -98,7 +98,7 @@ impl<R> Tokenizer<R> for MultiLineComment {
     }
 }
 
-///Lexes a first line Hashbang comment
+/// Lexes a first line Hashbang comment
 ///
 /// More information:
 ///  - [ECMAScript reference][spec]

diff --git a/boa_parser/src/lexer/cursor.rs b/boa_parser/src/lexer/cursor.rs
@@ -8,7 +8,8 @@ use std::io::{self, Bytes, Error, ErrorKind, Read};
 pub(super) struct Cursor<R> {
     iter: InnerIter<R>,
     pos: Position,
-    strict_mode: bool,
+    module: bool,
+    strict: bool,
 }
 
 impl<R> Cursor<R> {
@@ -31,13 +32,24 @@ impl<R> Cursor<R> {
     }
 
     /// Returns if strict mode is currently active.
-    pub(super) const fn strict_mode(&self) -> bool {
-        self.strict_mode
+    pub(super) const fn strict(&self) -> bool {
+        self.strict
     }
 
     /// Sets the current strict mode.
-    pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
-        self.strict_mode = strict_mode;
+    pub(super) fn set_strict(&mut self, strict: bool) {
+        self.strict = strict;
+    }
+
+    /// Returns if the module mode is currently active.
+    pub(super) const fn module(&self) -> bool {
+        self.module
+    }
+
+    /// Sets the current goal symbol to module.
+    pub(super) fn set_module(&mut self, module: bool) {
+        self.module = module;
+        self.strict = module;
     }
 }
 
@@ -50,7 +62,8 @@ where
         Self {
             iter: InnerIter::new(inner.bytes()),
             pos: Position::new(1, 1),
-            strict_mode: false,
+            strict: false,
+            module: false,
         }
     }
 
@@ -59,7 +72,8 @@ where
         Self {
             iter: InnerIter::new(inner.bytes()),
             pos,
-            strict_mode: false,
+            strict: false,
+            module: false,
         }
     }
 

diff --git a/boa_parser/src/lexer/mod.rs b/boa_parser/src/lexer/mod.rs
@@ -71,23 +71,6 @@ pub struct Lexer<R> {
 }
 
 impl<R> Lexer<R> {
-    /// Checks if a character is whitespace as per ECMAScript standards.
-    ///
-    /// The Rust `char::is_whitespace` function and the ECMAScript standard use different sets of
-    /// characters as whitespaces:
-    ///  * Rust uses `\p{White_Space}`,
-    ///  * ECMAScript standard uses `\{Space_Separator}` + `\u{0009}`, `\u{000B}`, `\u{000C}`, `\u{FEFF}`
-    ///
-    /// [More information](https://tc39.es/ecma262/#table-32)
-    const fn is_whitespace(ch: u32) -> bool {
-        matches!(
-            ch,
-            0x0020 | 0x0009 | 0x000B | 0x000C | 0x00A0 | 0xFEFF |
-            // Unicode Space_Seperator category (minus \u{0020} and \u{00A0} which are allready stated above)
-            0x1680 | 0x2000..=0x200A | 0x202F | 0x205F | 0x3000
-        )
-    }
-
     /// Sets the goal symbol for the lexer.
     pub(crate) fn set_goal(&mut self, elm: InputElement) {
         self.goal_symbol = elm;
@@ -99,13 +82,23 @@ impl<R> Lexer<R> {
     }
 
     /// Returns if strict mode is currently active.
-    pub(super) const fn strict_mode(&self) -> bool {
-        self.cursor.strict_mode()
+    pub(super) const fn strict(&self) -> bool {
+        self.cursor.strict()
     }
 
     /// Sets the current strict mode.
-    pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
-        self.cursor.set_strict_mode(strict_mode);
+    pub(super) fn set_strict(&mut self, strict: bool) {
+        self.cursor.set_strict(strict);
+    }
+
+    /// Returns if module mode is currently active.
+    pub(super) const fn module(&self) -> bool {
+        self.cursor.module()
+    }
+
+    /// Signals that the goal symbol is a module
+    pub(super) fn set_module(&mut self, module: bool) {
+        self.cursor.set_module(module);
     }
 
     /// Creates a new lexer.
@@ -180,14 +173,38 @@ impl<R> Lexer<R> {
         }
     }
 
+    /// Skips an HTML close comment (`-->`) if the `annex-b` feature is enabled.
+    pub(crate) fn skip_html_close(&mut self, interner: &mut Interner) -> Result<(), Error>
+    where
+        R: Read,
+    {
+        if !cfg!(feature = "annex-b") || self.module() {
+            return Ok(());
+        }
+
+        while self.cursor.peek_char()?.map_or(false, is_whitespace) {
+            let _next = self.cursor.next_char();
+        }
+
+        if self.cursor.peek_n(3)? == [b'-', b'-', b'>'] {
+            let _next = self.cursor.next_byte();
+            let _next = self.cursor.next_byte();
+            let _next = self.cursor.next_byte();
+
+            let start = self.cursor.pos();
+            SingleLineComment.lex(&mut self.cursor, start, interner)?;
+        }
+
+        Ok(())
+    }
+
     /// Retrieves the next token from the lexer.
     ///
     /// # Errors
     ///
     /// Will return `Err` on invalid tokens and invalid reads of the bytes being lexed.
     // We intentionally don't implement Iterator trait as Result<Option> is cleaner to handle.
-    #[allow(clippy::should_implement_trait)]
-    pub fn next(&mut self, interner: &mut Interner) -> Result<Option<Token>, Error>
+    pub(crate) fn next_no_skip(&mut self, interner: &mut Interner) -> Result<Option<Token>, Error>
     where
         R: Read,
     {
@@ -197,7 +214,7 @@ impl<R> Lexer<R> {
             let start = self.cursor.pos();
             if let Some(next_ch) = self.cursor.next_char()? {
                 // Ignore whitespace
-                if !Self::is_whitespace(next_ch) {
+                if !is_whitespace(next_ch) {
                     break (start, next_ch);
                 }
             } else {
@@ -269,6 +286,14 @@ impl<R> Lexer<R> {
                 )),
                 '#' => PrivateIdentifier::new().lex(&mut self.cursor, start, interner),
                 '/' => self.lex_slash_token(start, interner),
+                #[cfg(feature = "annex-b")]
+                '<' if !self.module() && self.cursor.peek_n(3)? == [b'!', b'-', b'-'] => {
+                    let _next = self.cursor.next_byte();
+                    let _next = self.cursor.next_byte();
+                    let _next = self.cursor.next_byte();
+                    let start = self.cursor.pos();
+                    SingleLineComment.lex(&mut self.cursor, start, interner)
+                }
                 #[allow(clippy::cast_possible_truncation)]
                 '=' | '*' | '+' | '-' | '%' | '|' | '&' | '^' | '<' | '>' | '!' | '~' | '?' => {
                     Operator::new(next_ch as u8).lex(&mut self.cursor, start, interner)
@@ -311,6 +336,28 @@ impl<R> Lexer<R> {
         }
     }
 
+    /// Retrieves the next token from the lexer, skipping comments.
+    ///
+    /// # Errors
+    ///
+    /// Will return `Err` on invalid tokens and invalid reads of the bytes being lexed.
+    // We intentionally don't implement Iterator trait as Result<Option> is cleaner to handle.
+    #[allow(clippy::should_implement_trait)]
+    pub fn next(&mut self, interner: &mut Interner) -> Result<Option<Token>, Error>
+    where
+        R: Read,
+    {
+        loop {
+            let Some(next) = self.next_no_skip(interner)? else {
+                return Ok(None)
+            };
+
+            if next.kind() != &TokenKind::Comment {
+                return Ok(Some(next));
+            }
+        }
+    }
+
     /// Performs the lexing of a template literal.
     pub(crate) fn lex_template(
         &mut self,
@@ -339,3 +386,20 @@ impl Default for InputElement {
         Self::RegExp
     }
 }
+
+/// Checks if a character is whitespace as per ECMAScript standards.
+///
+/// The Rust `char::is_whitespace` function and the ECMAScript standard use different sets of
+/// characters as whitespaces:
+///  * Rust uses `\p{White_Space}`,
+///  * ECMAScript standard uses `\{Space_Separator}` + `\u{0009}`, `\u{000B}`, `\u{000C}`, `\u{FEFF}`
+///
+/// [More information](https://tc39.es/ecma262/#table-32)
+const fn is_whitespace(ch: u32) -> bool {
+    matches!(
+        ch,
+        0x0020 | 0x0009 | 0x000B | 0x000C | 0x00A0 | 0xFEFF |
+            // Unicode Space_Seperator category (minus \u{0020} and \u{00A0} which are allready stated above)
+            0x1680 | 0x2000..=0x200A | 0x202F | 0x205F | 0x3000
+    )
+}
diff --git a/boa_parser/src/lexer/number.rs b/boa_parser/src/lexer/number.rs
@@ -255,7 +255,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
                         let ch = char::from(byte);
                         if ch.is_digit(8) {
                             // LegacyOctalIntegerLiteral, or a number with leading 0s.
-                            if cursor.strict_mode() {
+                            if cursor.strict() {
                                 // LegacyOctalIntegerLiteral is forbidden with strict mode true.
                                 return Err(Error::syntax(
                                     "implicit octal literals are not allowed in strict mode",
@@ -278,7 +278,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
                             // Indicates a numerical digit comes after then 0 but it isn't an octal digit
                             // so therefore this must be a number with an unneeded leading 0. This is
                             // forbidden in strict mode.
-                            if cursor.strict_mode() {
+                            if cursor.strict() {
                                 return Err(Error::syntax(
                                     "leading 0's are not allowed in strict mode",
                                     start_pos,

diff --git a/boa_parser/src/lexer/string.rs b/boa_parser/src/lexer/string.rs
@@ -89,7 +89,7 @@ impl<R> Tokenizer<R> for StringLiteral {
         let _timer = Profiler::global().start_event("StringLiteral", "Lexing");
 
         let (lit, span, escape_sequence) =
-            Self::take_string_characters(cursor, start_pos, self.terminator, cursor.strict_mode())?;
+            Self::take_string_characters(cursor, start_pos, self.terminator, cursor.strict())?;
 
         Ok(Token::new(
             TokenKind::string_literal(interner.get_or_intern(&lit[..]), escape_sequence),
@@ -116,7 +116,7 @@ impl StringLiteral {
         cursor: &mut Cursor<R>,
         start_pos: Position,
         terminator: StringTerminator,
-        is_strict_mode: bool,
+        strict: bool,
     ) -> Result<(Vec<u16>, Span, Option<EscapeSequence>), Error>
     where
         R: Read,
@@ -139,7 +139,7 @@ impl StringLiteral {
                         Self::take_escape_sequence_or_line_continuation(
                             cursor,
                             ch_start_pos,
-                            is_strict_mode,
+                            strict,
                             false,
                         )?
                     {
@@ -167,7 +167,7 @@ impl StringLiteral {
     pub(super) fn take_escape_sequence_or_line_continuation<R>(
         cursor: &mut Cursor<R>,
         start_pos: Position,
-        is_strict_mode: bool,
+        strict: bool,
         is_template_literal: bool,
     ) -> Result<Option<(u32, Option<EscapeSequence>)>, Error>
     where
@@ -208,7 +208,7 @@ impl StringLiteral {
                         "\\8 and \\9 are not allowed in template literal",
                         start_pos,
                     ));
-                } else if is_strict_mode {
+                } else if strict {
                     return Err(Error::syntax(
                         "\\8 and \\9 are not allowed in strict mode",
                         start_pos,
@@ -224,7 +224,7 @@ impl StringLiteral {
                     ));
                 }
 
-                if is_strict_mode {
+                if strict {
                     return Err(Error::syntax(
                         "octal escape sequences are not allowed in strict mode",
                         start_pos,

diff --git a/boa_parser/src/lexer/tests.rs b/boa_parser/src/lexer/tests.rs
@@ -1048,7 +1048,7 @@ fn string_legacy_octal_escape() {
     for (s, _) in &test_cases {
         let mut lexer = Lexer::new(s.as_bytes());
         let interner = &mut Interner::default();
-        lexer.set_strict_mode(true);
+        lexer.set_strict(true);
 
         if let Error::Syntax(_, pos) = lexer
             .next(interner)
@@ -1096,7 +1096,7 @@ fn string_non_octal_decimal_escape() {
     for (s, _) in &test_cases {
         let mut lexer = Lexer::new(s.as_bytes());
         let interner = &mut Interner::default();
-        lexer.set_strict_mode(true);
+        lexer.set_strict(true);
 
         if let Error::Syntax(_, pos) = lexer
             .next(interner)

diff --git a/boa_parser/src/parser/cursor/buffered_lexer/mod.rs b/boa_parser/src/parser/cursor/buffered_lexer/mod.rs
@@ -100,12 +100,20 @@ where
             .map_err(Error::from)
     }
 
-    pub(super) const fn strict_mode(&self) -> bool {
-        self.lexer.strict_mode()
+    pub(super) const fn strict(&self) -> bool {
+        self.lexer.strict()
     }
 
-    pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
-        self.lexer.set_strict_mode(strict_mode);
+    pub(super) fn set_strict(&mut self, strict: bool) {
+        self.lexer.set_strict(strict);
+    }
+
+    pub(super) const fn module(&self) -> bool {
+        self.lexer.module()
+    }
+
+    pub(super) fn set_module(&mut self, module: bool) {
+        self.lexer.set_module(module);
     }
 
     /// Fills the peeking buffer with the next token.
@@ -124,10 +132,13 @@ where
                 // We don't want to have multiple contiguous line terminators in the buffer, since
                 // they have no meaning.
                 let next = loop {
-                    let next = self.lexer.next(interner)?;
+                    self.lexer.skip_html_close(interner)?;
+                    let next = self.lexer.next_no_skip(interner)?;
                     if let Some(ref token) = next {
-                        if token.kind() != &TokenKind::LineTerminator {
-                            break next;
+                        match token.kind() {
+                            TokenKind::LineTerminator => { /* skip */ }
+                            TokenKind::Comment => self.lexer.skip_html_close(interner)?,
+                            _ => break next,
                         }
                     } else {
                         break None;