Skip to content

Commit 6fc3e43

Browse files
committed
Fix RegExp literal parsing
JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com
1 parent f2c846d commit 6fc3e43

File tree

3 files changed

+74
-24
lines changed

3 files changed

+74
-24
lines changed

jerry-core/parser/js/lexer.cpp

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#include "lit-strings.h"
2424
#include "jsp-early-error.h"
2525

26-
static token saved_token, prev_token, sent_token, empty_token;
26+
static token saved_token, prev_token, sent_token, empty_token, prev_non_lf_token;
2727

2828
static bool allow_dump_lines = false, strict_mode;
2929
static size_t buffer_size = 0;
@@ -1090,22 +1090,23 @@ lexer_parse_regexp (void)
10901090
token result;
10911091
bool is_char_class = false;
10921092

1093-
/* Eat up '/' */
10941093
JERRY_ASSERT (LA (0) == LIT_CHAR_SLASH);
1095-
consume_char ();
10961094
new_token ();
10971095

1096+
/* Eat up '/' */
1097+
consume_char ();
1098+
10981099
while (true)
10991100
{
1100-
ecma_char_t c = (ecma_char_t) LA (0);
1101-
1102-
if (c == LIT_CHAR_NULL)
1101+
if (lit_utf8_iterator_is_eos (&src_iter))
11031102
{
1104-
PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unclosed string", token_start_pos);
1103+
PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unterminated RegExp literal", token_start_pos);
11051104
}
1106-
else if (lit_char_is_line_terminator (c))
1105+
1106+
ecma_char_t c = (ecma_char_t) LA (0);
1107+
if (lit_char_is_line_terminator (c))
11071108
{
1108-
PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "RegExp literal shall not contain newline character", token_start_pos);
1109+
PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "RegExp literal should not contain newline character", token_start_pos);
11091110
}
11101111
else if (c == LIT_CHAR_BACKSLASH)
11111112
{
@@ -1140,16 +1141,15 @@ lexer_parse_regexp (void)
11401141
{
11411142
ecma_char_t c = (ecma_char_t) LA (0);
11421143

1143-
if (c == LIT_CHAR_NULL
1144-
|| !lit_char_is_word_char (c)
1145-
|| lit_char_is_line_terminator (c))
1144+
if (!lit_char_is_word_char (c) || lit_char_is_line_terminator (c))
11461145
{
11471146
break;
11481147
}
1148+
11491149
consume_char ();
11501150
}
11511151

1152-
result = lexer_create_token_for_charset (TOK_REGEXP, TOK_START (), TOK_SIZE ());
1152+
result = lexer_create_token_for_charset (TOK_REGEXP, TOK_START () + 1, TOK_SIZE () - 1);
11531153

11541154
is_token_parse_in_progress = false;
11551155
return result;
@@ -1294,16 +1294,16 @@ lexer_parse_token (void)
12941294
}
12951295

12961296
if (c == LIT_CHAR_SLASH
1297-
&& !(sent_token.type == TOK_NAME
1298-
|| sent_token.type == TOK_NULL
1299-
|| sent_token.type == TOK_BOOL
1300-
|| sent_token.type == TOK_CLOSE_BRACE
1301-
|| sent_token.type == TOK_CLOSE_SQUARE
1302-
|| sent_token.type == TOK_CLOSE_PAREN
1303-
|| sent_token.type == TOK_SMALL_INT
1304-
|| sent_token.type == TOK_NUMBER
1305-
|| sent_token.type == TOK_STRING
1306-
|| sent_token.type == TOK_REGEXP))
1297+
&& !(prev_non_lf_token.type == TOK_NAME
1298+
|| prev_non_lf_token.type == TOK_NULL
1299+
|| prev_non_lf_token.type == TOK_BOOL
1300+
|| prev_non_lf_token.type == TOK_CLOSE_BRACE
1301+
|| prev_non_lf_token.type == TOK_CLOSE_SQUARE
1302+
|| prev_non_lf_token.type == TOK_CLOSE_PAREN
1303+
|| prev_non_lf_token.type == TOK_SMALL_INT
1304+
|| prev_non_lf_token.type == TOK_NUMBER
1305+
|| prev_non_lf_token.type == TOK_STRING
1306+
|| prev_non_lf_token.type == TOK_REGEXP))
13071307
{
13081308
return lexer_parse_regexp ();
13091309
}
@@ -1506,6 +1506,10 @@ lexer_next_token (void)
15061506
{
15071507
dump_current_line ();
15081508
}
1509+
else
1510+
{
1511+
prev_non_lf_token = sent_token;
1512+
}
15091513

15101514
end:
15111515
return sent_token;
@@ -1516,6 +1520,7 @@ lexer_save_token (token tok)
15161520
{
15171521
JERRY_ASSERT (is_empty (saved_token));
15181522
saved_token = tok;
1523+
prev_non_lf_token = tok;
15191524
}
15201525

15211526
token
@@ -1531,6 +1536,7 @@ lexer_seek (lit_utf8_iterator_pos_t locus)
15311536

15321537
lit_utf8_iterator_seek (&src_iter, locus);
15331538
saved_token = empty_token;
1539+
prev_non_lf_token = empty_token;
15341540
}
15351541

15361542
/**
@@ -1833,7 +1839,7 @@ lexer_init (const jerry_api_char_t *source, /**< script source */
18331839
empty_token.uid = 0;
18341840
empty_token.loc = LIT_ITERATOR_POS_ZERO;
18351841

1836-
saved_token = prev_token = sent_token = empty_token;
1842+
saved_token = prev_token = sent_token = prev_non_lf_token = empty_token;
18371843

18381844
if (!lit_is_utf8_string_valid (source, (lit_utf8_size_t) source_size))
18391845
{

tests/jerry/arithmetics.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,5 @@ assert (1 / (-1 % -1) < 0);
3737
assert (1 / (-1 % 1) < 0);
3838
assert (1 / (1 % -1) > 0);
3939
assert (1 / (1 % 1) > 0);
40+
41+
assert (eval ("x\n\n=\n\n6\n\n/\n\n3") === 2)

tests/jerry/regexp-literal.js

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,45 @@ assert ("a"+/x/+"b" == "a/x/b");
2323

2424
t = /\/\[[\]/]/.exec("/[/");
2525
assert (t == "/[/");
26+
27+
t = /\u0000/.exec("\u0000");
28+
assert (t == "\u0000");
29+
30+
try {
31+
eval("/" + String.fromCharCode("0x0000") + "/");
32+
} catch (e) {
33+
assert (false);
34+
}
35+
36+
try {
37+
eval("var x = 5\n\n/foo/");
38+
assert(false);
39+
} catch (e) {
40+
assert(e instanceof SyntaxError);
41+
}
42+
43+
try {
44+
eval("var x = 5;\n\n/foo/");
45+
} catch (e) {
46+
assert(false);
47+
}
48+
49+
try {
50+
eval("for (;false;/abc/.exec(\"abc\")) {5}");
51+
} catch (e) {
52+
assert(false);
53+
}
54+
55+
try {
56+
eval("var a = [] /foo/");
57+
assert(false);
58+
} catch (e) {
59+
assert(e instanceof SyntaxError);
60+
}
61+
62+
try {
63+
eval("/");
64+
assert(false);
65+
} catch (e) {
66+
assert(e instanceof SyntaxError);
67+
}

0 commit comments

Comments
 (0)