From b3389b7d6724cb7ea5b9e25af88ccc54e8a84547 Mon Sep 17 00:00:00 2001 From: Paul Lancaster Date: Sat, 26 Sep 2020 11:00:15 +0100 Subject: [PATCH] Add strict mode to lex trait --- boa/src/syntax/lexer/comment.rs | 4 ++-- boa/src/syntax/lexer/identifier.rs | 2 +- boa/src/syntax/lexer/mod.rs | 24 +++++++++---------- boa/src/syntax/lexer/number.rs | 13 +++++----- boa/src/syntax/lexer/operator.rs | 2 +- boa/src/syntax/lexer/regex.rs | 2 +- boa/src/syntax/lexer/spread.rs | 2 +- boa/src/syntax/lexer/string.rs | 2 +- boa/src/syntax/lexer/template.rs | 2 +- .../parser/cursor/buffered_lexer/mod.rs | 4 +++- 10 files changed, 29 insertions(+), 28 deletions(-) diff --git a/boa/src/syntax/lexer/comment.rs b/boa/src/syntax/lexer/comment.rs index cedd084ca88..0d50b6294cb 100644 --- a/boa/src/syntax/lexer/comment.rs +++ b/boa/src/syntax/lexer/comment.rs @@ -23,7 +23,7 @@ use std::io::Read; pub(super) struct SingleLineComment; impl Tokenizer for SingleLineComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { @@ -58,7 +58,7 @@ impl Tokenizer for SingleLineComment { pub(super) struct MultiLineComment; impl Tokenizer for MultiLineComment { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/identifier.rs b/boa/src/syntax/lexer/identifier.rs index 15dfecbb7cd..c3d70c01155 100644 --- a/boa/src/syntax/lexer/identifier.rs +++ b/boa/src/syntax/lexer/identifier.rs @@ -31,7 +31,7 @@ impl Identifier { } impl Tokenizer for Identifier { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs index 4fd8f75c3ab..937733352d7 100644 --- a/boa/src/syntax/lexer/mod.rs +++ b/boa/src/syntax/lexer/mod.rs @@ -48,7 +48,7 @@ pub use token::{Token, TokenKind}; trait Tokenizer { /// Lexes the next token. - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read; } @@ -109,7 +109,7 @@ impl Lexer { // that means it could be multiple different tokens depending on the input token. // // As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar - pub(crate) fn lex_slash_token(&mut self, start: Position) -> Result + pub(crate) fn lex_slash_token(&mut self, start: Position, strict_mode: bool) -> Result where R: Read, { @@ -119,11 +119,11 @@ impl Lexer { match c { '/' => { self.cursor.next_char()?.expect("/ token vanished"); // Consume the '/' - SingleLineComment.lex(&mut self.cursor, start) + SingleLineComment.lex(&mut self.cursor, start, strict_mode) } '*' => { self.cursor.next_char()?.expect("* token vanished"); // Consume the '*' - MultiLineComment.lex(&mut self.cursor, start) + MultiLineComment.lex(&mut self.cursor, start, strict_mode) } ch => { match self.get_goal() { @@ -146,7 +146,7 @@ impl Lexer { } InputElement::RegExp | InputElement::RegExpOrTemplateTail => { // Can be a regular expression. - RegexLiteral.lex(&mut self.cursor, start) + RegexLiteral.lex(&mut self.cursor, start, strict_mode) } } } @@ -188,13 +188,13 @@ impl Lexer { TokenKind::LineTerminator, Span::new(start, self.cursor.pos()), )), - '"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start), - '`' => TemplateLiteral.lex(&mut self.cursor, start), + '"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start, strict_mode), + '`' => TemplateLiteral.lex(&mut self.cursor, start, strict_mode), _ if next_chr.is_digit(10) => { - NumberLiteral::new(next_chr, strict_mode).lex(&mut self.cursor, start) + NumberLiteral::new(next_chr).lex(&mut self.cursor, start, strict_mode) } _ if next_chr.is_alphabetic() || next_chr == '$' || next_chr == '_' => { - Identifier::new(next_chr).lex(&mut self.cursor, start) + Identifier::new(next_chr).lex(&mut self.cursor, start, strict_mode) } ';' => Ok(Token::new( Punctuator::Semicolon.into(), @@ -204,7 +204,7 @@ impl Lexer { Punctuator::Colon.into(), Span::new(start, self.cursor.pos()), )), - '.' => SpreadLiteral::new().lex(&mut self.cursor, start), + '.' => SpreadLiteral::new().lex(&mut self.cursor, start, strict_mode), '(' => Ok(Token::new( Punctuator::OpenParen.into(), Span::new(start, self.cursor.pos()), @@ -237,9 +237,9 @@ impl Lexer { Punctuator::Question.into(), Span::new(start, self.cursor.pos()), )), - '/' => self.lex_slash_token(start), + '/' => self.lex_slash_token(start, strict_mode), '=' | '*' | '+' | '-' | '%' | '|' | '&' | '^' | '<' | '>' | '!' | '~' => { - Operator::new(next_chr).lex(&mut self.cursor, start) + Operator::new(next_chr).lex(&mut self.cursor, start, strict_mode) } _ => { let details = format!( diff --git a/boa/src/syntax/lexer/number.rs b/boa/src/syntax/lexer/number.rs index 8391a3ee12f..e4cb5a71f59 100644 --- a/boa/src/syntax/lexer/number.rs +++ b/boa/src/syntax/lexer/number.rs @@ -23,14 +23,13 @@ use std::{io::Read, str::FromStr}; /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Number_type #[derive(Debug, Clone, Copy)] pub(super) struct NumberLiteral { - init: char, - strict_mode: bool, + init: char } impl NumberLiteral { /// Creates a new string literal lexer. - pub(super) fn new(init: char, strict_mode: bool) -> Self { - Self { init, strict_mode } + pub(super) fn new(init: char) -> Self { + Self { init } } } @@ -135,7 +134,7 @@ where } impl Tokenizer for NumberLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { @@ -187,7 +186,7 @@ impl Tokenizer for NumberLiteral { ch => { if ch.is_digit(8) { // LegacyOctalIntegerLiteral - if self.strict_mode { + if strict_mode { // LegacyOctalIntegerLiteral is forbidden with strict mode true. return Err(Error::syntax( "implicit octal literals are not allowed in strict mode", @@ -205,7 +204,7 @@ impl Tokenizer for NumberLiteral { // Indicates a numerical digit comes after then 0 but it isn't an octal digit // so therefore this must be a number with an unneeded leading 0. This is // forbidden in strict mode. - if self.strict_mode { + if strict_mode { return Err(Error::syntax( "leading 0's are not allowed in strict mode", start_pos, diff --git a/boa/src/syntax/lexer/operator.rs b/boa/src/syntax/lexer/operator.rs index 5aa72c7d559..052039f3f10 100644 --- a/boa/src/syntax/lexer/operator.rs +++ b/boa/src/syntax/lexer/operator.rs @@ -93,7 +93,7 @@ impl Operator { } impl Tokenizer for Operator { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/regex.rs b/boa/src/syntax/lexer/regex.rs index 2367c44d70f..472cd26d6c3 100644 --- a/boa/src/syntax/lexer/regex.rs +++ b/boa/src/syntax/lexer/regex.rs @@ -33,7 +33,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; pub(super) struct RegexLiteral; impl Tokenizer for RegexLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/spread.rs b/boa/src/syntax/lexer/spread.rs index cc8e0ad36f9..83a2581c521 100644 --- a/boa/src/syntax/lexer/spread.rs +++ b/boa/src/syntax/lexer/spread.rs @@ -31,7 +31,7 @@ impl SpreadLiteral { } impl Tokenizer for SpreadLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index 87b999664c8..30e79299c5c 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -51,7 +51,7 @@ enum StringTerminator { } impl Tokenizer for StringLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs index c51763c7f33..72b5c678ee2 100644 --- a/boa/src/syntax/lexer/template.rs +++ b/boa/src/syntax/lexer/template.rs @@ -24,7 +24,7 @@ use std::io::{self, ErrorKind, Read}; pub(super) struct TemplateLiteral; impl Tokenizer for TemplateLiteral { - fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position, strict_mode: bool) -> Result where R: Read, { diff --git a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs index bed01da9ef0..f7dce89e38a 100644 --- a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs +++ b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs @@ -79,7 +79,9 @@ where pub(super) fn lex_regex(&mut self, start: Position) -> Result { let _timer = BoaProfiler::global().start_event("cursor::lex_regex()", "Parsing"); self.set_goal(InputElement::RegExp); - self.lexer.lex_slash_token(start).map_err(|e| e.into()) + + let strict_mode: bool = false; // TODO enable setting strict mode on/off. + self.lexer.lex_slash_token(start, strict_mode).map_err(|e| e.into()) } /// Fills the peeking buffer with the next token.