Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix: regex started with /= parsed as AssignDiv #4048

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions core/engine/src/tests/class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,12 @@ fn class_field_initializer_name() {
TestAction::assert_eq("c.c.name", js_str!("#c")),
]);
}

#[test]
fn class_superclass_from_regex_error() {
run_test_actions([TestAction::assert_native_error(
"class A extends /=/ {}",
crate::JsNativeErrorKind::Type,
"superclass must be a constructor",
)]);
}
9 changes: 9 additions & 0 deletions core/engine/src/tests/operators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,15 @@ fn delete_in_strict_function_returned() {
)]);
}

#[test]
fn regex_slash_eq() {
run_test_actions([
TestAction::assert_eq("+/=/", JsValue::nan()),
TestAction::assert_eq("var a = 5; /=/; a", 5),
TestAction::assert_eq("x = () => /=/;\n\"a=b\".match(x())[0]", js_str!("=")),
]);
}

mod in_operator {
use super::*;

Expand Down
39 changes: 23 additions & 16 deletions core/parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,44 +112,51 @@ impl<R> Lexer<R> {
}
}

// Handles lexing of a token starting '/' with the '/' already being consumed.
// This could be a divide symbol or the start of a regex.
//
// A '/' symbol can always be a comment but if as tested above it is not then
// that means it could be multiple different tokens depending on the input token.
//
// As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar
/// Handles lexing of a token starting '/' with the '/' already being consumed.
/// This could be a divide symbol or the start of a regex.
///
/// If `init_with_eq` is `true`, assume that '/=' has already been consumed.
///
/// A '/' symbol can always be a comment but if as tested above it is not then
/// that means it could be multiple different tokens depending on the input token.
///
/// As per <https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar>
pub(crate) fn lex_slash_token(
&mut self,
start: Position,
interner: &mut Interner,
init_with_eq: bool,
) -> Result<Token, Error>
where
R: ReadChar,
{
let _timer = Profiler::global().start_event("lex_slash_token", "Lexing");

if let Some(c) = self.cursor.peek_char()? {
match c {
match (c, init_with_eq) {
// /
0x002F => {
(0x002F, false) => {
self.cursor.next_char()?.expect("/ token vanished"); // Consume the '/'
SingleLineComment.lex(&mut self.cursor, start, interner)
}
// *
0x002A => {
(0x002A, false) => {
self.cursor.next_char()?.expect("* token vanished"); // Consume the '*'
MultiLineComment.lex(&mut self.cursor, start, interner)
}
ch => {
(ch, init_with_eq) => {
match self.get_goal() {
InputElement::Div | InputElement::TemplateTail => {
// Only div punctuator allowed, regex not.

// =
if ch == 0x003D {
// Indicates this is an AssignDiv.
self.cursor.next_char()?.expect("= token vanished"); // Consume the '='
if init_with_eq || ch == 0x003D {
// if `=` is not consumed, consume it
if !init_with_eq {
// Indicates this is an AssignDiv.
// Consume the '='
self.cursor.next_char()?.expect("= token vanished");
}
Ok(Token::new(
Punctuator::AssignDiv.into(),
Span::new(start, self.cursor.pos()),
Expand All @@ -163,7 +170,7 @@ impl<R> Lexer<R> {
}
InputElement::RegExp | InputElement::HashbangOrRegExp => {
// Can be a regular expression.
RegexLiteral.lex(&mut self.cursor, start, interner)
RegexLiteral::new(init_with_eq).lex(&mut self.cursor, start, interner)
}
}
}
Expand Down Expand Up @@ -300,7 +307,7 @@ impl<R> Lexer<R> {
Span::new(start, self.cursor.pos()),
)),
'#' => PrivateIdentifier::new().lex(&mut self.cursor, start, interner),
'/' => self.lex_slash_token(start, interner),
'/' => self.lex_slash_token(start, interner, false),
#[cfg(feature = "annex-b")]
// <!--
'<' if !self.module()
Expand Down
17 changes: 16 additions & 1 deletion core/parser/src/lexer/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,18 @@ use std::str::{self, FromStr};
/// [spec]: https://tc39.es/ecma262/#sec-literals-regular-expression-literals
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
#[derive(Debug, Clone, Copy)]
pub(super) struct RegexLiteral;
pub(super) struct RegexLiteral {
// If there is more cases than only `/=`
// then use `Option<u8>` or (more correct) `Option<enum>`
init_with_eq: bool,
}

impl RegexLiteral {
/// `init_with_eq` is '=' after `/` already consumed?
pub(super) fn new(init_with_eq: bool) -> Self {
Self { init_with_eq }
}
}

impl<R> Tokenizer<R> for RegexLiteral {
fn lex(
Expand All @@ -38,6 +49,10 @@ impl<R> Tokenizer<R> for RegexLiteral {
let _timer = Profiler::global().start_event("RegexLiteral", "Lexing");

let mut body = Vec::new();
if self.init_with_eq {
body.push(u32::from(b'='));
}

let mut is_class_char = false;

// Lex RegularExpressionBody.
Expand Down
4 changes: 3 additions & 1 deletion core/parser/src/parser/cursor/buffered_lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,17 @@ where
}

/// Lexes the next tokens as a regex assuming that the starting '/' has already been consumed.
/// If `init_with_eq` is `true`, then assuming that the starting '/=' has already been consumed.
pub(super) fn lex_regex(
&mut self,
start: Position,
interner: &mut Interner,
init_with_eq: bool,
) -> ParseResult<Token> {
let _timer = Profiler::global().start_event("cursor::lex_regex()", "Parsing");
self.set_goal(InputElement::RegExp);
self.lexer
.lex_slash_token(start, interner)
.lex_slash_token(start, interner, init_with_eq)
.map_err(Into::into)
}

Expand Down
5 changes: 4 additions & 1 deletion core/parser/src/parser/cursor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,15 @@ where
self.buffered_lexer.set_goal(elm);
}

/// Lexes the next tokens as a regex assuming that the starting '/' has already been consumed.
/// If `init_with_eq` is `true`, then assuming that the starting '/=' has already been consumed.
pub(super) fn lex_regex(
&mut self,
start: Position,
interner: &mut Interner,
init_with_eq: bool,
) -> ParseResult<Token> {
self.buffered_lexer.lex_regex(start, interner)
self.buffered_lexer.lex_regex(start, interner, init_with_eq)
}

pub(super) fn lex_template(
Expand Down
6 changes: 4 additions & 2 deletions core/parser/src/parser/expression/primary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,12 @@ where
cursor.advance(interner);
Ok(node)
}
TokenKind::Punctuator(Punctuator::Div) => {
TokenKind::Punctuator(div @ (Punctuator::Div | Punctuator::AssignDiv)) => {
let init_with_eq = div == &Punctuator::AssignDiv;

let position = tok.span().start();
cursor.advance(interner);
let tok = cursor.lex_regex(position, interner)?;
let tok = cursor.lex_regex(position, interner, init_with_eq)?;

if let TokenKind::RegularExpressionLiteral(body, flags) = *tok.kind() {
Ok(AstRegExp::new(body, flags).into())
Expand Down
Loading