Skip to content

Commit

Permalink
Lazy evaluation for cooked template string (#1103)
Browse files Browse the repository at this point in the history
* Lazy evaluate cooked template string

Fix octal escape in string literal


Add tests


Fix zero escape


Fix zero escape lookahead


Rename variables


Rename helper functions


Refactor match arms


Fix escape line terminator sequence


Fix single character escape


Fix line terminator and escape followed by unicode char


Add NonOctalDecimalEscapeSequence


Fix comment


Refactor


Modify error message


Add tests


Rename tests


Add test for error


Add comments for unsafe bytes to str


Update boa/src/syntax/lexer/string.rs

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>
Minor refactor


Remove unsafe bytes to str


Fix panic when reading invalid utf-8 chars


Refactor string literal


Support invalid utf-8 chars in string literal input


Add cook function for template literal


Fix line continuation bug


Add methods for utf16 buffer trait


Add trait comments


Add error message for template literal


Add and fix comments


Hide unused exported function and modify tests


Fix bug


Lazy evaluate cooked template string


Fix clippy


Fix test262 stack overflow issue


Fix invalid setting strict mode with template literal


Remove unnecessary cache


Remove

* Add comments

* Minor update
  • Loading branch information
jevancc authored Mar 26, 2021
1 parent ff3dd4e commit 6333daa
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 111 deletions.
15 changes: 12 additions & 3 deletions boa/src/syntax/ast/node/template/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,17 @@ impl fmt::Display for TemplateLit {
pub struct TaggedTemplate {
tag: Box<Node>,
raws: Vec<Box<str>>,
cookeds: Vec<Box<str>>,
cookeds: Vec<Option<Box<str>>>,
exprs: Vec<Node>,
}

impl TaggedTemplate {
pub fn new(tag: Node, raws: Vec<Box<str>>, cookeds: Vec<Box<str>>, exprs: Vec<Node>) -> Self {
pub fn new(
tag: Node,
raws: Vec<Box<str>>,
cookeds: Vec<Option<Box<str>>>,
exprs: Vec<Node>,
) -> Self {
Self {
tag: Box::new(tag),
raws,
Expand All @@ -96,7 +101,11 @@ impl Executable for TaggedTemplate {
}

for (i, cooked) in self.cookeds.iter().enumerate() {
template_object.set_field(i, Value::from(cooked), context)?;
if let Some(cooked) = cooked {
template_object.set_field(i, Value::from(cooked), context)?;
} else {
template_object.set_field(i, Value::undefined(), context)?;
}
}
template_object.set_field("raw", raw_array, context)?;

Expand Down
124 changes: 78 additions & 46 deletions boa/src/syntax/lexer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,80 @@ use crate::{
};
use std::io::{self, ErrorKind, Read};

#[cfg(feature = "deser")]
use serde::{Deserialize, Serialize};

#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug)]
pub struct TemplateString {
/// The start position of the template string. Used to make lexer error if `to_owned_cooked` failed.
start_pos: Position,
/// The template string of template literal with argument `raw` true.
raw: Box<str>,
}

impl TemplateString {
pub fn new<R>(raw: R, start_pos: Position) -> Self
where
R: Into<Box<str>>,
{
Self {
start_pos,
raw: raw.into(),
}
}

/// Converts the raw template string into a mutable string slice.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn as_raw(&self) -> &str {
self.raw.as_ref()
}

/// Creats a new cooked template string. Returns a lexer error if it fails to cook the template string.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn to_owned_cooked(&self) -> Result<Box<str>, Error> {
let mut cursor = Cursor::with_position(self.raw.as_bytes(), self.start_pos);
let mut buf: Vec<u16> = Vec::new();

loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;

match ch {
Some(0x005C /* \ */) => {
let escape_value = StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
true,
true,
)?;

if let Some(escape_value) = escape_value {
buf.push_code_point(escape_value);
}
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
// which matches the TV of <CR> <LF>
buf.push_code_point(ch);
}
None => break,
}
}

Ok(buf.to_string_lossy().into())
}
}

/// Template literal lexing.
///
/// Expects: Initial ` to already be consumed by cursor.
Expand Down Expand Up @@ -43,21 +117,19 @@ impl<R> Tokenizer<R> for TemplateLiteral {
match ch {
0x0060 /* ` */ => {
let raw = buf.to_string_lossy();
// TODO: Cook the raw string only when needed (lazy evaluation)
let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
let template_string = TemplateString::new(raw, start_pos);

return Ok(Token::new(
TokenKind::template_no_substitution(raw, cooked),
TokenKind::template_no_substitution(template_string),
Span::new(start_pos, cursor.pos()),
));
}
0x0024 /* $ */ if cursor.next_is(b'{')? => {
let raw = buf.to_string_lossy();
// TODO: Cook the raw string only when needed (lazy evaluation)
let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
let template_string = TemplateString::new(raw, start_pos);

return Ok(Token::new(
TokenKind::template_middle(raw, cooked),
TokenKind::template_middle(template_string),
Span::new(start_pos, cursor.pos()),
));
}
Expand All @@ -82,43 +154,3 @@ impl<R> Tokenizer<R> for TemplateLiteral {
}
}
}

impl TemplateLiteral {
fn cook_template_string(
raw: &str,
start_pos: Position,
is_strict_mode: bool,
) -> Result<String, Error> {
let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos);
let mut buf: Vec<u16> = Vec::new();

loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;

match ch {
Some(0x005C /* \ */) => {
if let Some(escape_value) =
StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
is_strict_mode,
true,
)?
{
buf.push_code_point(escape_value);
}
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
// which matches the TV of <CR> <LF>
buf.push_code_point(ch);
}
None => break,
}
}

Ok(buf.to_string_lossy())
}
}
6 changes: 5 additions & 1 deletion boa/src/syntax/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use super::token::Numeric;
use super::*;
use super::{Error, Position};
use crate::syntax::ast::Keyword;
use crate::syntax::lexer::template::TemplateString;
use std::str;

fn span(start: (u32, u32), end: (u32, u32)) -> Span {
Expand Down Expand Up @@ -136,7 +137,10 @@ fn check_template_literal_simple() {

assert_eq!(
lexer.next().unwrap().unwrap().kind(),
&TokenKind::template_no_substitution("I'm a template literal", "I'm a template literal")
&TokenKind::template_no_substitution(TemplateString::new(
"I'm a template literal",
Position::new(1, 1)
))
);
}

Expand Down
42 changes: 9 additions & 33 deletions boa/src/syntax/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use super::regex::RegExpFlags;
use crate::{
builtins::BigInt,
syntax::ast::{Keyword, Punctuator, Span},
syntax::lexer::template::TemplateString,
};

use std::fmt::{self, Debug, Display, Formatter};

#[cfg(feature = "deser")]
Expand Down Expand Up @@ -126,20 +126,10 @@ pub enum TokenKind {
StringLiteral(Box<str>),

/// A part of a template literal without substitution.
TemplateNoSubstitution {
/// The string as it has been entered, without processing escape sequences.
raw: Box<str>,
/// The raw string with escape sequences processed.
cooked: Box<str>,
},
TemplateNoSubstitution(TemplateString),

/// The part of a template literal between substitutions
TemplateMiddle {
/// The string as it has been entered, without processing escape sequences.
raw: Box<str>,
/// The raw string with escape sequences processed.
cooked: Box<str>,
},
TemplateMiddle(TemplateString),

/// A regular expression, consisting of body and flags.
RegularExpressionLiteral(Box<str>, RegExpFlags),
Expand Down Expand Up @@ -220,26 +210,12 @@ impl TokenKind {
Self::StringLiteral(lit.into())
}

pub fn template_middle<R, C>(raw: R, cooked: C) -> Self
where
R: Into<Box<str>>,
C: Into<Box<str>>,
{
Self::TemplateMiddle {
raw: raw.into(),
cooked: cooked.into(),
}
pub fn template_middle(template_string: TemplateString) -> Self {
Self::TemplateMiddle(template_string)
}

pub fn template_no_substitution<R, C>(raw: R, cooked: C) -> Self
where
R: Into<Box<str>>,
C: Into<Box<str>>,
{
Self::TemplateNoSubstitution {
raw: raw.into(),
cooked: cooked.into(),
}
pub fn template_no_substitution(template_string: TemplateString) -> Self {
Self::TemplateNoSubstitution(template_string)
}

/// Creates a `RegularExpressionLiteral` token kind.
Expand Down Expand Up @@ -275,8 +251,8 @@ impl Display for TokenKind {
Self::NumericLiteral(Numeric::BigInt(ref num)) => write!(f, "{}n", num),
Self::Punctuator(ref punc) => write!(f, "{}", punc),
Self::StringLiteral(ref lit) => write!(f, "{}", lit),
Self::TemplateNoSubstitution { ref cooked, .. } => write!(f, "{}", cooked),
Self::TemplateMiddle { ref cooked, .. } => write!(f, "{}", cooked),
Self::TemplateNoSubstitution(ref ts) => write!(f, "{}", ts.as_raw()),
Self::TemplateMiddle(ref ts) => write!(f, "{}", ts.as_raw()),
Self::RegularExpressionLiteral(ref body, ref flags) => write!(f, "/{}/{}", body, flags),
Self::LineTerminator => write!(f, "line terminator"),
Self::Comment => write!(f, "comment"),
Expand Down
12 changes: 6 additions & 6 deletions boa/src/syntax/parser/expression/left_hand_side/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ where

loop {
match token.kind() {
TokenKind::TemplateMiddle { raw, cooked } => {
raws.push(raw.clone());
cookeds.push(cooked.clone());
TokenKind::TemplateMiddle(template_string) => {
raws.push(template_string.as_raw().to_owned().into_boxed_str());
cookeds.push(template_string.to_owned_cooked().ok());
exprs.push(
Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?,
);
Expand All @@ -70,9 +70,9 @@ where
"template literal",
)?;
}
TokenKind::TemplateNoSubstitution { raw, cooked } => {
raws.push(raw.clone());
cookeds.push(cooked.clone());
TokenKind::TemplateNoSubstitution(template_string) => {
raws.push(template_string.as_raw().to_owned().into_boxed_str());
cookeds.push(template_string.to_owned_cooked().ok());
return Ok(Node::from(TaggedTemplate::new(
self.tag, raws, cookeds, exprs,
)));
Expand Down
12 changes: 8 additions & 4 deletions boa/src/syntax/parser/expression/primary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ where
TokenKind::BooleanLiteral(boolean) => Ok(Const::from(*boolean).into()),
TokenKind::NullLiteral => Ok(Const::Null.into()),
TokenKind::Identifier(ident) => Ok(Identifier::from(ident.as_ref()).into()), // TODO: IdentifierReference
TokenKind::StringLiteral(s) | TokenKind::TemplateNoSubstitution { cooked: s, .. } => {
Ok(Const::from(s.as_ref()).into())
TokenKind::StringLiteral(s) => Ok(Const::from(s.as_ref()).into()),
TokenKind::TemplateNoSubstitution(template_string) => {
Ok(Const::from(template_string.to_owned_cooked().map_err(ParseError::lex)?).into())
}
TokenKind::NumericLiteral(Numeric::Integer(num)) => Ok(Const::from(*num).into()),
TokenKind::NumericLiteral(Numeric::Rational(num)) => Ok(Const::from(*num).into()),
Expand Down Expand Up @@ -138,11 +139,14 @@ where
Err(ParseError::unexpected(tok, "regular expression literal"))
}
}
TokenKind::TemplateMiddle { cooked, .. } => TemplateLiteral::new(
TokenKind::TemplateMiddle(template_string) => TemplateLiteral::new(
self.allow_yield,
self.allow_await,
tok.span().start(),
cooked.as_ref(),
template_string
.to_owned_cooked()
.map_err(ParseError::lex)?
.as_ref(),
)
.parse(cursor)
.map(Node::TemplateLit),
Expand Down
16 changes: 8 additions & 8 deletions boa/src/syntax/parser/expression/primary/template/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ where

loop {
match cursor.lex_template(self.start)?.kind() {
TokenKind::TemplateMiddle {
cooked: template, ..
} => {
elements.push(TemplateElement::String(template.to_owned()));
TokenKind::TemplateMiddle(template_string) => {
let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?;

elements.push(TemplateElement::String(cooked));
elements.push(TemplateElement::Expr(
Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?,
));
Expand All @@ -86,10 +86,10 @@ where
"template literal",
)?;
}
TokenKind::TemplateNoSubstitution {
cooked: template, ..
} => {
elements.push(TemplateElement::String(template.to_owned()));
TokenKind::TemplateNoSubstitution(template_string) => {
let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?;

elements.push(TemplateElement::String(cooked));
return Ok(TemplateLit::new(elements));
}
_ => {
Expand Down
7 changes: 2 additions & 5 deletions boa/src/syntax/parser/function/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,8 @@ where
TokenKind::Punctuator(Punctuator::CloseBlock) => {
return Ok(Vec::new().into());
}
TokenKind::StringLiteral(string)
| TokenKind::TemplateNoSubstitution { cooked: string, .. } => {
if string == &"use strict".into() {
cursor.set_strict_mode(true);
}
TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => {
cursor.set_strict_mode(true);
}
_ => {}
}
Expand Down
7 changes: 2 additions & 5 deletions boa/src/syntax/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,8 @@ where
match cursor.peek(0)? {
Some(tok) => {
match tok.kind() {
TokenKind::StringLiteral(string)
| TokenKind::TemplateNoSubstitution { cooked: string, .. } => {
if string.as_ref() == "use strict" {
cursor.set_strict_mode(true);
}
TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => {
cursor.set_strict_mode(true);
}
_ => {}
}
Expand Down

0 comments on commit 6333daa

Please sign in to comment.