From 6333daae3fdc0949a3f48e16e3d88266f1c4fa50 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Fri, 26 Mar 2021 06:45:32 -0700 Subject: [PATCH] Lazy evaluation for cooked template string (#1103) * Lazy evaluate cooked template string Fix octal escape in string literal Add tests Fix zero escape Fix zero escape lookahead Rename variables Rename helper functions Refactor match arms Fix escape line terminator sequence Fix single character escape Fix line terminator and escape followed by unicode char Add NonOctalDecimalEscapeSequence Fix comment Refactor Modify error message Add tests Rename tests Add test for error Add comments for unsafe bytes to str Update boa/src/syntax/lexer/string.rs Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com> Minor refactor Remove unsafe bytes to str Fix panic when reading invalid utf-8 chars Refactor string literal Support invalid utf-8 chars in string literal input Add cook function for template literal Fix line continuation bug Add methods for utf16 buffer trait Add trait comments Add error message for template literal Add and fix comments Hide unused exported function and modify tests Fix bug Lazy evaluate cooked template string Fix clippy Fix test262 stack overflow issue Fix invalid setting strict mode with template literal Remove unnecessary cache Remove * Add comments * Minor update --- boa/src/syntax/ast/node/template/mod.rs | 15 ++- boa/src/syntax/lexer/template.rs | 124 +++++++++++------- boa/src/syntax/lexer/tests.rs | 6 +- boa/src/syntax/lexer/token.rs | 42 ++---- .../expression/left_hand_side/template.rs | 12 +- .../syntax/parser/expression/primary/mod.rs | 12 +- .../parser/expression/primary/template/mod.rs | 16 +-- boa/src/syntax/parser/function/mod.rs | 7 +- boa/src/syntax/parser/mod.rs | 7 +- 9 files changed, 130 insertions(+), 111 deletions(-) diff --git a/boa/src/syntax/ast/node/template/mod.rs b/boa/src/syntax/ast/node/template/mod.rs index 2e6a747a848..5ab8f321692 100644 --- a/boa/src/syntax/ast/node/template/mod.rs +++ b/boa/src/syntax/ast/node/template/mod.rs @@ -69,12 +69,17 @@ impl fmt::Display for TemplateLit { pub struct TaggedTemplate { tag: Box, raws: Vec>, - cookeds: Vec>, + cookeds: Vec>>, exprs: Vec, } impl TaggedTemplate { - pub fn new(tag: Node, raws: Vec>, cookeds: Vec>, exprs: Vec) -> Self { + pub fn new( + tag: Node, + raws: Vec>, + cookeds: Vec>>, + exprs: Vec, + ) -> Self { Self { tag: Box::new(tag), raws, @@ -96,7 +101,11 @@ impl Executable for TaggedTemplate { } for (i, cooked) in self.cookeds.iter().enumerate() { - template_object.set_field(i, Value::from(cooked), context)?; + if let Some(cooked) = cooked { + template_object.set_field(i, Value::from(cooked), context)?; + } else { + template_object.set_field(i, Value::undefined(), context)?; + } } template_object.set_field("raw", raw_array, context)?; diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs index 9636d1c849b..76aedeaa7a4 100644 --- a/boa/src/syntax/lexer/template.rs +++ b/boa/src/syntax/lexer/template.rs @@ -11,6 +11,80 @@ use crate::{ }; use std::io::{self, ErrorKind, Read}; +#[cfg(feature = "deser")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))] +#[derive(Clone, PartialEq, Debug)] +pub struct TemplateString { + /// The start position of the template string. Used to make lexer error if `to_owned_cooked` failed. + start_pos: Position, + /// The template string of template literal with argument `raw` true. + raw: Box, +} + +impl TemplateString { + pub fn new(raw: R, start_pos: Position) -> Self + where + R: Into>, + { + Self { + start_pos, + raw: raw.into(), + } + } + + /// Converts the raw template string into a mutable string slice. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings + pub fn as_raw(&self) -> &str { + self.raw.as_ref() + } + + /// Creats a new cooked template string. Returns a lexer error if it fails to cook the template string. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings + pub fn to_owned_cooked(&self) -> Result, Error> { + let mut cursor = Cursor::with_position(self.raw.as_bytes(), self.start_pos); + let mut buf: Vec = Vec::new(); + + loop { + let ch_start_pos = cursor.pos(); + let ch = cursor.next_char()?; + + match ch { + Some(0x005C /* \ */) => { + let escape_value = StringLiteral::take_escape_sequence_or_line_continuation( + &mut cursor, + ch_start_pos, + true, + true, + )?; + + if let Some(escape_value) = escape_value { + buf.push_code_point(escape_value); + } + } + Some(ch) => { + // The caller guarantees that sequences '`' and '${' never appear + // LineTerminatorSequence is consumed by `cursor.next_char()` and returns , + // which matches the TV of + buf.push_code_point(ch); + } + None => break, + } + } + + Ok(buf.to_string_lossy().into()) + } +} + /// Template literal lexing. /// /// Expects: Initial ` to already be consumed by cursor. @@ -43,21 +117,19 @@ impl Tokenizer for TemplateLiteral { match ch { 0x0060 /* ` */ => { let raw = buf.to_string_lossy(); - // TODO: Cook the raw string only when needed (lazy evaluation) - let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?; + let template_string = TemplateString::new(raw, start_pos); return Ok(Token::new( - TokenKind::template_no_substitution(raw, cooked), + TokenKind::template_no_substitution(template_string), Span::new(start_pos, cursor.pos()), )); } 0x0024 /* $ */ if cursor.next_is(b'{')? => { let raw = buf.to_string_lossy(); - // TODO: Cook the raw string only when needed (lazy evaluation) - let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?; + let template_string = TemplateString::new(raw, start_pos); return Ok(Token::new( - TokenKind::template_middle(raw, cooked), + TokenKind::template_middle(template_string), Span::new(start_pos, cursor.pos()), )); } @@ -82,43 +154,3 @@ impl Tokenizer for TemplateLiteral { } } } - -impl TemplateLiteral { - fn cook_template_string( - raw: &str, - start_pos: Position, - is_strict_mode: bool, - ) -> Result { - let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos); - let mut buf: Vec = Vec::new(); - - loop { - let ch_start_pos = cursor.pos(); - let ch = cursor.next_char()?; - - match ch { - Some(0x005C /* \ */) => { - if let Some(escape_value) = - StringLiteral::take_escape_sequence_or_line_continuation( - &mut cursor, - ch_start_pos, - is_strict_mode, - true, - )? - { - buf.push_code_point(escape_value); - } - } - Some(ch) => { - // The caller guarantees that sequences '`' and '${' never appear - // LineTerminatorSequence is consumed by `cursor.next_char()` and returns , - // which matches the TV of - buf.push_code_point(ch); - } - None => break, - } - } - - Ok(buf.to_string_lossy()) - } -} diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs index 2cafc5c9000..eb6c4f71a2a 100644 --- a/boa/src/syntax/lexer/tests.rs +++ b/boa/src/syntax/lexer/tests.rs @@ -6,6 +6,7 @@ use super::token::Numeric; use super::*; use super::{Error, Position}; use crate::syntax::ast::Keyword; +use crate::syntax::lexer::template::TemplateString; use std::str; fn span(start: (u32, u32), end: (u32, u32)) -> Span { @@ -136,7 +137,10 @@ fn check_template_literal_simple() { assert_eq!( lexer.next().unwrap().unwrap().kind(), - &TokenKind::template_no_substitution("I'm a template literal", "I'm a template literal") + &TokenKind::template_no_substitution(TemplateString::new( + "I'm a template literal", + Position::new(1, 1) + )) ); } diff --git a/boa/src/syntax/lexer/token.rs b/boa/src/syntax/lexer/token.rs index 82a4f2e9848..e2042c2f33e 100644 --- a/boa/src/syntax/lexer/token.rs +++ b/boa/src/syntax/lexer/token.rs @@ -10,8 +10,8 @@ use super::regex::RegExpFlags; use crate::{ builtins::BigInt, syntax::ast::{Keyword, Punctuator, Span}, + syntax::lexer::template::TemplateString, }; - use std::fmt::{self, Debug, Display, Formatter}; #[cfg(feature = "deser")] @@ -126,20 +126,10 @@ pub enum TokenKind { StringLiteral(Box), /// A part of a template literal without substitution. - TemplateNoSubstitution { - /// The string as it has been entered, without processing escape sequences. - raw: Box, - /// The raw string with escape sequences processed. - cooked: Box, - }, + TemplateNoSubstitution(TemplateString), /// The part of a template literal between substitutions - TemplateMiddle { - /// The string as it has been entered, without processing escape sequences. - raw: Box, - /// The raw string with escape sequences processed. - cooked: Box, - }, + TemplateMiddle(TemplateString), /// A regular expression, consisting of body and flags. RegularExpressionLiteral(Box, RegExpFlags), @@ -220,26 +210,12 @@ impl TokenKind { Self::StringLiteral(lit.into()) } - pub fn template_middle(raw: R, cooked: C) -> Self - where - R: Into>, - C: Into>, - { - Self::TemplateMiddle { - raw: raw.into(), - cooked: cooked.into(), - } + pub fn template_middle(template_string: TemplateString) -> Self { + Self::TemplateMiddle(template_string) } - pub fn template_no_substitution(raw: R, cooked: C) -> Self - where - R: Into>, - C: Into>, - { - Self::TemplateNoSubstitution { - raw: raw.into(), - cooked: cooked.into(), - } + pub fn template_no_substitution(template_string: TemplateString) -> Self { + Self::TemplateNoSubstitution(template_string) } /// Creates a `RegularExpressionLiteral` token kind. @@ -275,8 +251,8 @@ impl Display for TokenKind { Self::NumericLiteral(Numeric::BigInt(ref num)) => write!(f, "{}n", num), Self::Punctuator(ref punc) => write!(f, "{}", punc), Self::StringLiteral(ref lit) => write!(f, "{}", lit), - Self::TemplateNoSubstitution { ref cooked, .. } => write!(f, "{}", cooked), - Self::TemplateMiddle { ref cooked, .. } => write!(f, "{}", cooked), + Self::TemplateNoSubstitution(ref ts) => write!(f, "{}", ts.as_raw()), + Self::TemplateMiddle(ref ts) => write!(f, "{}", ts.as_raw()), Self::RegularExpressionLiteral(ref body, ref flags) => write!(f, "/{}/{}", body, flags), Self::LineTerminator => write!(f, "line terminator"), Self::Comment => write!(f, "comment"), diff --git a/boa/src/syntax/parser/expression/left_hand_side/template.rs b/boa/src/syntax/parser/expression/left_hand_side/template.rs index 0c5d78e374d..a08194ec98f 100644 --- a/boa/src/syntax/parser/expression/left_hand_side/template.rs +++ b/boa/src/syntax/parser/expression/left_hand_side/template.rs @@ -59,9 +59,9 @@ where loop { match token.kind() { - TokenKind::TemplateMiddle { raw, cooked } => { - raws.push(raw.clone()); - cookeds.push(cooked.clone()); + TokenKind::TemplateMiddle(template_string) => { + raws.push(template_string.as_raw().to_owned().into_boxed_str()); + cookeds.push(template_string.to_owned_cooked().ok()); exprs.push( Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, ); @@ -70,9 +70,9 @@ where "template literal", )?; } - TokenKind::TemplateNoSubstitution { raw, cooked } => { - raws.push(raw.clone()); - cookeds.push(cooked.clone()); + TokenKind::TemplateNoSubstitution(template_string) => { + raws.push(template_string.as_raw().to_owned().into_boxed_str()); + cookeds.push(template_string.to_owned_cooked().ok()); return Ok(Node::from(TaggedTemplate::new( self.tag, raws, cookeds, exprs, ))); diff --git a/boa/src/syntax/parser/expression/primary/mod.rs b/boa/src/syntax/parser/expression/primary/mod.rs index 16183f002ec..20ec3c8656e 100644 --- a/boa/src/syntax/parser/expression/primary/mod.rs +++ b/boa/src/syntax/parser/expression/primary/mod.rs @@ -107,8 +107,9 @@ where TokenKind::BooleanLiteral(boolean) => Ok(Const::from(*boolean).into()), TokenKind::NullLiteral => Ok(Const::Null.into()), TokenKind::Identifier(ident) => Ok(Identifier::from(ident.as_ref()).into()), // TODO: IdentifierReference - TokenKind::StringLiteral(s) | TokenKind::TemplateNoSubstitution { cooked: s, .. } => { - Ok(Const::from(s.as_ref()).into()) + TokenKind::StringLiteral(s) => Ok(Const::from(s.as_ref()).into()), + TokenKind::TemplateNoSubstitution(template_string) => { + Ok(Const::from(template_string.to_owned_cooked().map_err(ParseError::lex)?).into()) } TokenKind::NumericLiteral(Numeric::Integer(num)) => Ok(Const::from(*num).into()), TokenKind::NumericLiteral(Numeric::Rational(num)) => Ok(Const::from(*num).into()), @@ -138,11 +139,14 @@ where Err(ParseError::unexpected(tok, "regular expression literal")) } } - TokenKind::TemplateMiddle { cooked, .. } => TemplateLiteral::new( + TokenKind::TemplateMiddle(template_string) => TemplateLiteral::new( self.allow_yield, self.allow_await, tok.span().start(), - cooked.as_ref(), + template_string + .to_owned_cooked() + .map_err(ParseError::lex)? + .as_ref(), ) .parse(cursor) .map(Node::TemplateLit), diff --git a/boa/src/syntax/parser/expression/primary/template/mod.rs b/boa/src/syntax/parser/expression/primary/template/mod.rs index ab03356cfb6..cdfed7ea4fa 100644 --- a/boa/src/syntax/parser/expression/primary/template/mod.rs +++ b/boa/src/syntax/parser/expression/primary/template/mod.rs @@ -74,10 +74,10 @@ where loop { match cursor.lex_template(self.start)?.kind() { - TokenKind::TemplateMiddle { - cooked: template, .. - } => { - elements.push(TemplateElement::String(template.to_owned())); + TokenKind::TemplateMiddle(template_string) => { + let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?; + + elements.push(TemplateElement::String(cooked)); elements.push(TemplateElement::Expr( Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, )); @@ -86,10 +86,10 @@ where "template literal", )?; } - TokenKind::TemplateNoSubstitution { - cooked: template, .. - } => { - elements.push(TemplateElement::String(template.to_owned())); + TokenKind::TemplateNoSubstitution(template_string) => { + let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?; + + elements.push(TemplateElement::String(cooked)); return Ok(TemplateLit::new(elements)); } _ => { diff --git a/boa/src/syntax/parser/function/mod.rs b/boa/src/syntax/parser/function/mod.rs index 8340fe79640..ee3463cef3e 100644 --- a/boa/src/syntax/parser/function/mod.rs +++ b/boa/src/syntax/parser/function/mod.rs @@ -269,11 +269,8 @@ where TokenKind::Punctuator(Punctuator::CloseBlock) => { return Ok(Vec::new().into()); } - TokenKind::StringLiteral(string) - | TokenKind::TemplateNoSubstitution { cooked: string, .. } => { - if string == &"use strict".into() { - cursor.set_strict_mode(true); - } + TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => { + cursor.set_strict_mode(true); } _ => {} } diff --git a/boa/src/syntax/parser/mod.rs b/boa/src/syntax/parser/mod.rs index d33f6ea15d5..f6b677a77cb 100644 --- a/boa/src/syntax/parser/mod.rs +++ b/boa/src/syntax/parser/mod.rs @@ -125,11 +125,8 @@ where match cursor.peek(0)? { Some(tok) => { match tok.kind() { - TokenKind::StringLiteral(string) - | TokenKind::TemplateNoSubstitution { cooked: string, .. } => { - if string.as_ref() == "use strict" { - cursor.set_strict_mode(true); - } + TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => { + cursor.set_strict_mode(true); } _ => {} }