From b3389b7d6724cb7ea5b9e25af88ccc54e8a84547 Mon Sep 17 00:00:00 2001 From: Paul Lancaster Date: Sat, 26 Sep 2020 11:00:15 +0100 Subject: [PATCH 1/3] Add strict mode to lex trait --- boa/src/syntax/lexer/comment.rs | 4 ++-- boa/src/syntax/lexer/identifier.rs | 2 +- boa/src/syntax/lexer/mod.rs | 24 +++++++++---------- boa/src/syntax/lexer/number.rs | 13 +++++----- boa/src/syntax/lexer/operator.rs | 2 +- boa/src/syntax/lexer/regex.rs | 2 +- boa/src/syntax/lexer/spread.rs | 2 +- boa/src/syntax/lexer/string.rs | 2 +- boa/src/syntax/lexer/template.rs | 2 +- .../parser/cursor/buffered_lexer/mod.rs | 4 +++- 10 files changed, 29 insertions(+), 28 deletions(-) diff --git a/boa/src/syntax/lexer/comment.rs b/boa/src/syntax/lexer/comment.rs index cedd084ca88..0d50b6294cb 100644 --- a/boa/src/syntax/lexer/comment.rs +++ b/boa/src/syntax/lexer/comment.rs @@ -23,7 +23,7 @@ use std::io::Read; pub(super) struct SingleLineComment; impl Tokenizer for SingleLineComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { @@ -58,7 +58,7 @@ impl Tokenizer for SingleLineComment { pub(super) struct MultiLineComment; impl Tokenizer for MultiLineComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/identifier.rs b/boa/src/syntax/lexer/identifier.rs index 15dfecbb7cd..c3d70c01155 100644 --- a/boa/src/syntax/lexer/identifier.rs +++ b/boa/src/syntax/lexer/identifier.rs @@ -31,7 +31,7 @@ impl Identifier { } impl Tokenizer for Identifier { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs index 4fd8f75c3ab..937733352d7 100644 --- a/boa/src/syntax/lexer/mod.rs +++ b/boa/src/syntax/lexer/mod.rs @@ -48,7 +48,7 @@ pub use token::{Token, TokenKind}; trait Tokenizer { /// Lexes the next token. - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read; } @@ -109,7 +109,7 @@ impl Lexer { // that means it could be multiple different tokens depending on the input token. // // As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar - pub(crate) fn lex_slash_token(&mut self, start: Position) -> Result + pub(crate) fn lex_slash_token(&mut self, start: Position, strict_mode: bool) -> Result where R: Read, { @@ -119,11 +119,11 @@ impl Lexer { match c { '/' => { self.cursor.next_char()?.expect("/ token vanished"); // Consume the '/' - SingleLineComment.lex(&mut self.cursor, start) + SingleLineComment.lex(&mut self.cursor, start, strict_mode) } '*' => { self.cursor.next_char()?.expect("* token vanished"); // Consume the '*' - MultiLineComment.lex(&mut self.cursor, start) + MultiLineComment.lex(&mut self.cursor, start, strict_mode) } ch => { match self.get_goal() { @@ -146,7 +146,7 @@ impl Lexer { } InputElement::RegExp | InputElement::RegExpOrTemplateTail => { // Can be a regular expression. - RegexLiteral.lex(&mut self.cursor, start) + RegexLiteral.lex(&mut self.cursor, start, strict_mode) } } } @@ -188,13 +188,13 @@ impl Lexer { TokenKind::LineTerminator, Span::new(start, self.cursor.pos()), )), - '"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start), - '`' => TemplateLiteral.lex(&mut self.cursor, start), + '"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start, strict_mode), + '`' => TemplateLiteral.lex(&mut self.cursor, start, strict_mode), _ if next_chr.is_digit(10) => { - NumberLiteral::new(next_chr, strict_mode).lex(&mut self.cursor, start) + NumberLiteral::new(next_chr).lex(&mut self.cursor, start, strict_mode) } _ if next_chr.is_alphabetic() || next_chr == '$' || next_chr == '_' => { - Identifier::new(next_chr).lex(&mut self.cursor, start) + Identifier::new(next_chr).lex(&mut self.cursor, start, strict_mode) } ';' => Ok(Token::new( Punctuator::Semicolon.into(), @@ -204,7 +204,7 @@ impl Lexer { Punctuator::Colon.into(), Span::new(start, self.cursor.pos()), )), - '.' => SpreadLiteral::new().lex(&mut self.cursor, start), + '.' => SpreadLiteral::new().lex(&mut self.cursor, start, strict_mode), '(' => Ok(Token::new( Punctuator::OpenParen.into(), Span::new(start, self.cursor.pos()), @@ -237,9 +237,9 @@ impl Lexer { Punctuator::Question.into(), Span::new(start, self.cursor.pos()), )), - '/' => self.lex_slash_token(start), + '/' => self.lex_slash_token(start, strict_mode), '=' | '*' | '+' | '-' | '%' | '|' | '&' | '^' | '<' | '>' | '!' | '~' => { - Operator::new(next_chr).lex(&mut self.cursor, start) + Operator::new(next_chr).lex(&mut self.cursor, start, strict_mode) } _ => { let details = format!( diff --git a/boa/src/syntax/lexer/number.rs b/boa/src/syntax/lexer/number.rs index 8391a3ee12f..e4cb5a71f59 100644 --- a/boa/src/syntax/lexer/number.rs +++ b/boa/src/syntax/lexer/number.rs @@ -23,14 +23,13 @@ use std::{io::Read, str::FromStr}; /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Number_type #[derive(Debug, Clone, Copy)] pub(super) struct NumberLiteral { - init: char, - strict_mode: bool, + init: char } impl NumberLiteral { /// Creates a new string literal lexer. - pub(super) fn new(init: char, strict_mode: bool) -> Self { - Self { init, strict_mode } + pub(super) fn new(init: char) -> Self { + Self { init } } } @@ -135,7 +134,7 @@ where } impl Tokenizer for NumberLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { @@ -187,7 +186,7 @@ impl Tokenizer for NumberLiteral { ch => { if ch.is_digit(8) { // LegacyOctalIntegerLiteral - if self.strict_mode { + if strict_mode { // LegacyOctalIntegerLiteral is forbidden with strict mode true. return Err(Error::syntax( "implicit octal literals are not allowed in strict mode", @@ -205,7 +204,7 @@ impl Tokenizer for NumberLiteral { // Indicates a numerical digit comes after then 0 but it isn't an octal digit // so therefore this must be a number with an unneeded leading 0. This is // forbidden in strict mode. - if self.strict_mode { + if strict_mode { return Err(Error::syntax( "leading 0's are not allowed in strict mode", start_pos, diff --git a/boa/src/syntax/lexer/operator.rs b/boa/src/syntax/lexer/operator.rs index 5aa72c7d559..052039f3f10 100644 --- a/boa/src/syntax/lexer/operator.rs +++ b/boa/src/syntax/lexer/operator.rs @@ -93,7 +93,7 @@ impl Operator { } impl Tokenizer for Operator { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/regex.rs b/boa/src/syntax/lexer/regex.rs index 2367c44d70f..472cd26d6c3 100644 --- a/boa/src/syntax/lexer/regex.rs +++ b/boa/src/syntax/lexer/regex.rs @@ -33,7 +33,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; pub(super) struct RegexLiteral; impl Tokenizer for RegexLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/spread.rs b/boa/src/syntax/lexer/spread.rs index cc8e0ad36f9..83a2581c521 100644 --- a/boa/src/syntax/lexer/spread.rs +++ b/boa/src/syntax/lexer/spread.rs @@ -31,7 +31,7 @@ impl SpreadLiteral { } impl Tokenizer for SpreadLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index 87b999664c8..30e79299c5c 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -51,7 +51,7 @@ enum StringTerminator { } impl Tokenizer for StringLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs index c51763c7f33..72b5c678ee2 100644 --- a/boa/src/syntax/lexer/template.rs +++ b/boa/src/syntax/lexer/template.rs @@ -24,7 +24,7 @@ use std::io::{self, ErrorKind, Read}; pub(super) struct TemplateLiteral; impl Tokenizer for TemplateLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs index bed01da9ef0..f7dce89e38a 100644 --- a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs +++ b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs @@ -79,7 +79,9 @@ where pub(super) fn lex_regex(&mut self, start: Position) -> Result { let _timer = BoaProfiler::global().start_event("cursor::lex_regex()", "Parsing"); self.set_goal(InputElement::RegExp); - self.lexer.lex_slash_token(start).map_err(|e| e.into()) + + let strict_mode: bool = false; // TODO enable setting strict mode on/off. + self.lexer.lex_slash_token(start, strict_mode).map_err(|e| e.into()) } /// Fills the peeking buffer with the next token. From 0108b8009ca27d50b7911c72520ac11a8755f159 Mon Sep 17 00:00:00 2001 From: Paul Lancaster Date: Sat, 26 Sep 2020 11:22:52 +0100 Subject: [PATCH 2/3] Lexer reserved keyword identifier strict mode lexing --- boa/src/syntax/lexer/comment.rs | 14 +++++++-- boa/src/syntax/lexer/identifier.rs | 31 ++++++++++++++++++- boa/src/syntax/lexer/mod.rs | 13 ++++++-- boa/src/syntax/lexer/number.rs | 9 ++++-- boa/src/syntax/lexer/operator.rs | 7 ++++- boa/src/syntax/lexer/regex.rs | 7 ++++- boa/src/syntax/lexer/spread.rs | 7 ++++- boa/src/syntax/lexer/string.rs | 7 ++++- boa/src/syntax/lexer/template.rs | 7 ++++- .../parser/cursor/buffered_lexer/mod.rs | 4 ++- 10 files changed, 93 insertions(+), 13 deletions(-) diff --git a/boa/src/syntax/lexer/comment.rs b/boa/src/syntax/lexer/comment.rs index 0d50b6294cb..9d3a3155813 100644 --- a/boa/src/syntax/lexer/comment.rs +++ b/boa/src/syntax/lexer/comment.rs @@ -23,7 +23,12 @@ use std::io::Read; pub(super) struct SingleLineComment; impl Tokenizer for SingleLineComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { @@ -58,7 +63,12 @@ impl Tokenizer for SingleLineComment { pub(super) struct MultiLineComment; impl Tokenizer for MultiLineComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/identifier.rs b/boa/src/syntax/lexer/identifier.rs index c3d70c01155..0d8081afbf8 100644 --- a/boa/src/syntax/lexer/identifier.rs +++ b/boa/src/syntax/lexer/identifier.rs @@ -10,6 +10,20 @@ use crate::{ }; use std::io::Read; +const STRICT_FORBIDDEN_IDENTIFIERS: [&str; 11] = [ + "eval", + "arguments", + "implements", + "interface", + "let", + "package", + "private", + "protected", + "public", + "static", + "yield", +]; + /// Identifier lexing. /// /// More information: @@ -31,7 +45,12 @@ impl Identifier { } impl Tokenizer for Identifier { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { @@ -51,6 +70,16 @@ impl Tokenizer for Identifier { if let Ok(keyword) = slice.parse() { TokenKind::Keyword(keyword) } else { + if strict_mode && STRICT_FORBIDDEN_IDENTIFIERS.contains(&slice) { + return Err(Error::Syntax( + format!( + "using future reserved keyword '{}' not allowed in strict mode", + slice + ) + .into(), + start_pos, + )); + } TokenKind::identifier(slice) } } diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs index 937733352d7..689b684f06a 100644 --- a/boa/src/syntax/lexer/mod.rs +++ b/boa/src/syntax/lexer/mod.rs @@ -48,7 +48,12 @@ pub use token::{Token, TokenKind}; trait Tokenizer { /// Lexes the next token. - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read; } @@ -109,7 +114,11 @@ impl Lexer { // that means it could be multiple different tokens depending on the input token. // // As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar - pub(crate) fn lex_slash_token(&mut self, start: Position, strict_mode: bool) -> Result + pub(crate) fn lex_slash_token( + &mut self, + start: Position, + strict_mode: bool, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/number.rs b/boa/src/syntax/lexer/number.rs index e4cb5a71f59..1cce22256b4 100644 --- a/boa/src/syntax/lexer/number.rs +++ b/boa/src/syntax/lexer/number.rs @@ -23,7 +23,7 @@ use std::{io::Read, str::FromStr}; /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Number_type #[derive(Debug, Clone, Copy)] pub(super) struct NumberLiteral { - init: char + init: char, } impl NumberLiteral { @@ -134,7 +134,12 @@ where } impl Tokenizer for NumberLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/operator.rs b/boa/src/syntax/lexer/operator.rs index 052039f3f10..322d8f53be4 100644 --- a/boa/src/syntax/lexer/operator.rs +++ b/boa/src/syntax/lexer/operator.rs @@ -93,7 +93,12 @@ impl Operator { } impl Tokenizer for Operator { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/regex.rs b/boa/src/syntax/lexer/regex.rs index 472cd26d6c3..f6c3ecf5a51 100644 --- a/boa/src/syntax/lexer/regex.rs +++ b/boa/src/syntax/lexer/regex.rs @@ -33,7 +33,12 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; pub(super) struct RegexLiteral; impl Tokenizer for RegexLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/spread.rs b/boa/src/syntax/lexer/spread.rs index 83a2581c521..56647be3c8d 100644 --- a/boa/src/syntax/lexer/spread.rs +++ b/boa/src/syntax/lexer/spread.rs @@ -31,7 +31,12 @@ impl SpreadLiteral { } impl Tokenizer for SpreadLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index 30e79299c5c..51c97ef5dc3 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -51,7 +51,12 @@ enum StringTerminator { } impl Tokenizer for StringLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs index 72b5c678ee2..61e94d37ab5 100644 --- a/boa/src/syntax/lexer/template.rs +++ b/boa/src/syntax/lexer/template.rs @@ -24,7 +24,12 @@ use std::io::{self, ErrorKind, Read}; pub(super) struct TemplateLiteral; impl Tokenizer for TemplateLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result + fn lex( + &mut self, + cursor: &mut Cursor, + start_pos: Position, + strict_mode: bool, + ) -> Result where R: Read, { diff --git a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs index f7dce89e38a..407e58be129 100644 --- a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs +++ b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs @@ -81,7 +81,9 @@ where self.set_goal(InputElement::RegExp); let strict_mode: bool = false; // TODO enable setting strict mode on/off. - self.lexer.lex_slash_token(start, strict_mode).map_err(|e| e.into()) + self.lexer + .lex_slash_token(start, strict_mode) + .map_err(|e| e.into()) } /// Fills the peeking buffer with the next token. From 5fb2e64341c4fc9da0f1cc6d3b9872ea008267d4 Mon Sep 17 00:00:00 2001 From: Paul Lancaster Date: Sat, 26 Sep 2020 11:47:06 +0100 Subject: [PATCH 3/3] Prevent with statement in strict mode --- boa/src/syntax/lexer/identifier.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/boa/src/syntax/lexer/identifier.rs b/boa/src/syntax/lexer/identifier.rs index 0d8081afbf8..b279ddf3b1f 100644 --- a/boa/src/syntax/lexer/identifier.rs +++ b/boa/src/syntax/lexer/identifier.rs @@ -4,7 +4,7 @@ use super::{Cursor, Error, Tokenizer}; use crate::{ profiler::BoaProfiler, syntax::{ - ast::{Position, Span}, + ast::{Keyword, Position, Span}, lexer::{Token, TokenKind}, }, }; @@ -68,6 +68,12 @@ impl Tokenizer for Identifier { "null" => TokenKind::NullLiteral, slice => { if let Ok(keyword) = slice.parse() { + if strict_mode && keyword == Keyword::With { + return Err(Error::Syntax( + "using 'with' statement not allowed in strict mode".into(), + start_pos, + )); + } TokenKind::Keyword(keyword) } else { if strict_mode && STRICT_FORBIDDEN_IDENTIFIERS.contains(&slice) {