Lexer reserved keyword identifier strict mode lexing

boa-dev · Sep 26, 2020 · 0108b80 · 0108b80
1 parent b3389b7
commit 0108b80
Show file tree

Hide file tree

Showing 10 changed files with 93 additions and 13 deletions.
diff --git a/boa/src/syntax/lexer/comment.rs b/boa/src/syntax/lexer/comment.rs
@@ -23,7 +23,12 @@ use std::io::Read;
 pub(super) struct SingleLineComment;
 
 impl<R> Tokenizer<R> for SingleLineComment {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {
@@ -58,7 +63,12 @@ impl<R> Tokenizer<R> for SingleLineComment {
 pub(super) struct MultiLineComment;
 
 impl<R> Tokenizer<R> for MultiLineComment {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/identifier.rs b/boa/src/syntax/lexer/identifier.rs
@@ -10,6 +10,20 @@ use crate::{
 };
 use std::io::Read;
 
+const STRICT_FORBIDDEN_IDENTIFIERS: [&str; 11] = [
+    "eval",
+    "arguments",
+    "implements",
+    "interface",
+    "let",
+    "package",
+    "private",
+    "protected",
+    "public",
+    "static",
+    "yield",
+];
+
 /// Identifier lexing.
 ///
 /// More information:
@@ -31,7 +45,12 @@ impl Identifier {
 }
 
 impl<R> Tokenizer<R> for Identifier {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {
@@ -51,6 +70,16 @@ impl<R> Tokenizer<R> for Identifier {
                 if let Ok(keyword) = slice.parse() {
                     TokenKind::Keyword(keyword)
                 } else {
+                    if strict_mode && STRICT_FORBIDDEN_IDENTIFIERS.contains(&slice) {
+                        return Err(Error::Syntax(
+                            format!(
+                                "using future reserved keyword '{}' not allowed in strict mode",
+                                slice
+                            )
+                            .into(),
+                            start_pos,
+                        ));
+                    }
                     TokenKind::identifier(slice)
                 }
             }

diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs
@@ -48,7 +48,12 @@ pub use token::{Token, TokenKind};
 
 trait Tokenizer<R> {
     /// Lexes the next token.
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read;
 }
@@ -109,7 +114,11 @@ impl<R> Lexer<R> {
     // that means it could be multiple different tokens depending on the input token.
     //
     // As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar
-    pub(crate) fn lex_slash_token(&mut self, start: Position, strict_mode: bool) -> Result<Token, Error>
+    pub(crate) fn lex_slash_token(
+        &mut self,
+        start: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/number.rs b/boa/src/syntax/lexer/number.rs
@@ -23,7 +23,7 @@ use std::{io::Read, str::FromStr};
 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Number_type
 #[derive(Debug, Clone, Copy)]
 pub(super) struct NumberLiteral {
-    init: char
+    init: char,
 }
 
 impl NumberLiteral {
@@ -134,7 +134,12 @@ where
 }
 
 impl<R> Tokenizer<R> for NumberLiteral {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/operator.rs b/boa/src/syntax/lexer/operator.rs
@@ -93,7 +93,12 @@ impl Operator {
 }
 
 impl<R> Tokenizer<R> for Operator {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/regex.rs b/boa/src/syntax/lexer/regex.rs
@@ -33,7 +33,12 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
 pub(super) struct RegexLiteral;
 
 impl<R> Tokenizer<R> for RegexLiteral {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/spread.rs b/boa/src/syntax/lexer/spread.rs
@@ -31,7 +31,12 @@ impl SpreadLiteral {
 }
 
 impl<R> Tokenizer<R> for SpreadLiteral {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
@@ -51,7 +51,12 @@ enum StringTerminator {
 }
 
 impl<R> Tokenizer<R> for StringLiteral {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs
@@ -24,7 +24,12 @@ use std::io::{self, ErrorKind, Read};
 pub(super) struct TemplateLiteral;
 
 impl<R> Tokenizer<R> for TemplateLiteral {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position, strict_mode: bool) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs
@@ -81,7 +81,9 @@ where
         self.set_goal(InputElement::RegExp);
 
         let strict_mode: bool = false; // TODO enable setting strict mode on/off.
-        self.lexer.lex_slash_token(start, strict_mode).map_err(|e| e.into())
+        self.lexer
+            .lex_slash_token(start, strict_mode)
+            .map_err(|e| e.into())
     }
 
     /// Fills the peeking buffer with the next token.