Skip to content

Commit

Permalink
Add tests for RegExp.escape
Browse files Browse the repository at this point in the history
  • Loading branch information
leobalter committed Jun 29, 2024
1 parent d2596e2 commit a74928b
Show file tree
Hide file tree
Showing 22 changed files with 800 additions and 0 deletions.
4 changes: 4 additions & 0 deletions features.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ String.prototype.toWellFormed
# https://github.com/tc39/proposal-json-parse-with-source
json-parse-with-source

# RegExp.escape
# https://github.com/tc39/proposal-regex-escaping
RegExp

# Regular expression modifiers
# https://github.com/tc39/proposal-regexp-modifiers
regexp-modifiers
Expand Down
17 changes: 17 additions & 0 deletions test/built-ins/RegExp/escape/cross-realm.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright 2024 Leo Balter. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.

/*---
esid: sec-regexp.escape
description: escape called with a RegExp object from another realm
features: [RegExp.escape, cross-realm]
---*/

const str = "oi+hello";
const other = $262.createRealm().global;

assert.sameValue(typeof other.RegExp.escape, "function", "other.RegExp.escape is a function");

const res = other.RegExp.escape.call(RegExp, str);

assert.sameValue(res, RegExp.escape(str), "cross-realm escape works correctly");
28 changes: 28 additions & 0 deletions test/built-ins/RegExp/escape/escaped-code-points.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (C) 2024 Leo Balter. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.

/*---
esid: sec-encodeforregexescape
description: Encodes code points greater than 0xFF
info: |
EncodeForRegExpEscape ( c )
3. Let otherPunctuators be the string-concatenation of ",-=<>#&!%:;@~'`" and the code unit 0x0022 (QUOTATION MARK).
4. Let toEscape be StringToCodePoints(otherPunctuators).
5. If toEscape contains c, c is matched by WhiteSpace or LineTerminator, or c has the same numeric value as a leading surrogate or trailing surrogate, then
a. If c ≤ 0xFF, then
i. Let hex be Number::toString(𝔽(c), 16).
ii. Return the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), "x", and StringPad(hex, 2, "0", START).
b. Let escaped be the empty String.
c. Let codeUnits be UTF16EncodeCodePoint(c).
d. For each code unit cu of codeUnits, do
i. Set escaped to the string-concatenation of escaped and UnicodeEscape(cu).
e. Return escaped.
6. Return UTF16EncodeCodePoint(c).
features: [RegExp.escape]
---*/

const codePoints = String.fromCharCode(0x100, 0x200, 0x300);
const expectedEscapedCodePoints = '\\u0100\\u0200\\u0300';

assert.sameValue(RegExp.escape(codePoints), expectedEscapedCodePoints, 'Code points > 0xFF are correctly escaped');
25 changes: 25 additions & 0 deletions test/built-ins/RegExp/escape/escaped-control-characters.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (C) 2024 Leo Balter. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.

/*---
esid: sec-encodeforregexescape
description: Encodes control characters with their ControlEscape sequences
info: |
EncodeForRegExpEscape ( c )
2. If c is the code point listed in some cell of the “Code Point” column of Table 64, then
a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and the string in the “ControlEscape” column of the row whose “Code Point” column contains c.
ControlEscape, Numeric Value, Code Point, Unicode Name, Symbol
t 9 U+0009 CHARACTER TABULATION <HT>
n 10 U+000A LINE FEED (LF) <LF>
v 11 U+000B LINE TABULATION <VT>
f 12 U+000C FORM FEED (FF) <FF>
r 13 U+000D CARRIAGE RETURN (CR) <CR>
features: [RegExp.escape]
---*/

const controlCharacters = '\b\t\n\v\f\r';
const expectedEscapedCharacters = '\\b\\t\\n\\v\\f\\r';

assert.sameValue(RegExp.escape(controlCharacters), expectedEscapedCharacters, 'Control characters are correctly escaped');
48 changes: 48 additions & 0 deletions test/built-ins/RegExp/escape/escaped-lineterminator.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (C) 2024 Leo Balter. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.

/*---
esid: sec-regexp.escape
description: Escaped lineterminator characters (simple assertions)
info: |
EncodeForRegExpEscape ( c )
...
3. Let otherPunctuators be the string-concatenation of ",-=<>#&!%:;@~'`" and the code unit 0x0022 (QUOTATION MARK).
4. Let toEscape be StringToCodePoints(otherPunctuators).
5. If toEscape ..., c is matched by WhiteSpace or LineTerminator, ..., then
a. If c ≤ 0xFF, then
i. Let hex be Number::toString(𝔽(c), 16).
ii. Return the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), "x", and StringPad(hex, 2, "0", START).
b. Let escaped be the empty String.
c. Let codeUnits be UTF16EncodeCodePoint(c).
d. For each code unit cu of codeUnits, do
i. Set escaped to the string-concatenation of escaped and UnicodeEscape(cu).
e. Return escaped.
6. Return UTF16EncodeCodePoint(c).
LineTerminator ::
<LF>
<CR>
<LS>
<PS>
Exceptions:
2. If c is the code point listed in some cell of the “Code Point” column of Table 64, then
a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and the string in the “ControlEscape” column of the row whose “Code Point” column contains c.
ControlEscape, Numeric Value, Code Point, Unicode Name, Symbol
t 9 U+0009 CHARACTER TABULATION <HT>
n 10 U+000A LINE FEED (LF) <LF>
v 11 U+000B LINE TABULATION <VT>
f 12 U+000C FORM FEED (FF) <FF>
r 13 U+000D CARRIAGE RETURN (CR) <CR>
features: [RegExp.escape]
---*/

assert.sameValue(RegExp.escape('\u2028'), '\\u2028', `line terminator \\u2028 is escaped correctly to \\u2028`);
assert.sameValue(RegExp.escape('\u2029'), '\\u2029', `line terminator \\u2029 is escaped correctly to \\u2029`);

assert.sameValue(RegExp.escape('\u2028\u2029'), '\\u2028\\u2029', `line terminators are escaped correctly`);
assert.sameValue(RegExp.escape('\u2028a\u2029a'), '\\u2028a\\u2029a', `mixed line terminators are escaped correctly`);
58 changes: 58 additions & 0 deletions test/built-ins/RegExp/escape/escaped-otherpuntuactors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright (C) 2024 Leo Balter. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.

/*---
esid: sec-regexp.escape
description: Escaped other punctuators characters
info: |
EncodeForRegExpEscape ( c )
...
3. Let otherPunctuators be the string-concatenation of ",-=<>#&!%:;@~'`" and the code unit 0x0022 (QUOTATION MARK).
4. Let toEscape be StringToCodePoints(otherPunctuators).
5. If toEscape contains c, (...), then
a. If c ≤ 0xFF, then
i. Let hex be Number::toString(𝔽(c), 16).
ii. Return the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), "x", and StringPad(hex, 2, "0", START).
b. Let escaped be the empty String.
c. Let codeUnits be UTF16EncodeCodePoint(c).
d. For each code unit cu of codeUnits, do
i. Set escaped to the string-concatenation of escaped and UnicodeEscape(cu).
e. Return escaped.
6. Return UTF16EncodeCodePoint(c).
codePoints
0x002c ,
0x002d -
0x003d =
0x003c <
0x003e >
0x0023 #
0x0026 &
0x0021 !
0x0025 %
0x003a :
0x003b ;
0x0040 @
0x007e ~
0x0027 '
0x0060 `
0x0022 "
features: [RegExp.escape]
---*/

const otherPunctuators = ",-=<>#&!%:;@~'`\"";

// Return the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), "x", and StringPad(hex, 2, "0", START).
for (const c of otherPunctuators) {
const expected = `\\x${c.codePointAt(0).toString(16)}`;
assert.sameValue(RegExp.escape(c), expected, `${c} is escaped correctly`);
}

const otherPunctuatorsExpected = "\\x2c\\x2d\\x3d\\x3c\\x3e\\x23\\x26\\x21\\x25\\x3a\\x3b\\x40\\x7e\\x27\\x60\\x22";

assert.sameValue(
RegExp.escape(otherPunctuators),
otherPunctuatorsExpected,
'all other punctuators are escaped correctly'
);
17 changes: 17 additions & 0 deletions test/built-ins/RegExp/escape/escaped-solidus-character-mixed.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright (C) 2024 Leo Balter. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.

/*---
esid: sec-regexp.escape
description: Escaped U+002F (SOLIDUS) characters (mixed assertions)
info: |
EncodeForRegExpEscape ( c )
1. If c is matched by SyntaxCharacter or c is U+002F (SOLIDUS), then
a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and UTF16EncodeCodePoint(c).
features: [RegExp.escape]
---*/

assert.sameValue(RegExp.escape('.a/b'), '\\.a\\/b', 'mixed string with solidus character is escaped correctly');
assert.sameValue(RegExp.escape('/./'), '\\/\\.\\/', 'solidus character is escaped correctly - regexp similar');
assert.sameValue(RegExp.escape('./a\\/*b+c?d^e$f|g{2}h[i]j\\k'), '\\.\\/a\\\\/\\*b\\+c\\?d\\^e\\$f\\|g\\{2\\}h\\[i\\]j\\\\k', 'complex string with multiple special characters is escaped correctly');
18 changes: 18 additions & 0 deletions test/built-ins/RegExp/escape/escaped-solidus-character-simple.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (C) 2024 Leo Balter. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.

/*---
esid: sec-regexp.escape
description: Escaped U+002F (SOLIDUS) character (simple assertions)
info: |
EncodeForRegExpEscape ( c )
1. If c is matched by SyntaxCharacter or c is U+002F (SOLIDUS), then
a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and UTF16EncodeCodePoint(c).
features: [RegExp.escape]
---*/

assert.sameValue(RegExp.escape('/'), '\\/', 'solidus character is escaped correctly');
assert.sameValue(RegExp.escape('//'), '\\/\\/', 'solidus character is escaped correctly - multiple occurrences 1');
assert.sameValue(RegExp.escape('///'), '\\/\\/\\/', 'solidus character is escaped correctly - multiple occurrences 2');
assert.sameValue(RegExp.escape('////'), '\\/\\/\\/\\/', 'solidus character is escaped correctly - multiple occurrences 3');
Loading

0 comments on commit a74928b

Please sign in to comment.