Skip to content

Commit acdbd10

Browse files
authored
Amend scanner to support astral characters in identifiers when parsing es6+ (microsoft#32096)
* Amend scanner to support astral characters in identifiers when parsing es6+ * Use charSize helper rather than one-off maybe advance helper * Update script to emit informative comment, run in unicode 12.1 environment * Add suggested change
1 parent bf903eb commit acdbd10

6 files changed

+126
-13
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
2+
const MAX_UNICODE_CODEPOINT = 0x10FFFF;
3+
const isStart = c => /[\p{ID_Start}\u{2118}\u{212E}\u{309B}\u{309C}]/u.test(c); // Other_ID_Start explicitly included for back compat - see http://www.unicode.org/reports/tr31/#Introduction
4+
const isPart = c => /[\p{ID_Continue}\u{00B7}\u{0387}\u{19DA}\u{1369}\u{136A}\u{136B}\u{136C}\u{136D}\u{136E}\u{136F}\u{1370}\u{1371}]/u.test(c) || isStart(c); // Likewise for Other_ID_Continue
5+
const parts = [];
6+
let partsActive = false;
7+
let startsActive = false;
8+
const starts = [];
9+
10+
for (let i = 0; i < MAX_UNICODE_CODEPOINT; i++) {
11+
if (isStart(String.fromCodePoint(i)) !== startsActive) {
12+
starts.push(i - +startsActive);
13+
startsActive = !startsActive;
14+
}
15+
if (isPart(String.fromCodePoint(i)) !== partsActive) {
16+
parts.push(i - +partsActive);
17+
partsActive = !partsActive;
18+
}
19+
}
20+
21+
console.log(`/**
22+
* Generated by scripts/regenerate-unicode-identifier-parts.js on node ${process.version} with unicode ${process.versions.unicode}
23+
* based on http://www.unicode.org/reports/tr31/ and https://www.ecma-international.org/ecma-262/6.0/#sec-names-and-keywords
24+
* unicodeESNextIdentifierStart corresponds to the ID_Start and Other_ID_Start property, and
25+
* unicodeESNextIdentifierPart corresponds to ID_Continue, Other_ID_Continue, plus ID_Start and Other_ID_Start
26+
*/`);
27+
console.log(`const unicodeESNextIdentifierStart = [${starts.join(", ")}];`);
28+
console.log(`const unicodeESNextIdentifierPart = [${parts.join(", ")}];`);

src/compiler/scanner.ts

+52-13
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
//// [extendedUnicodePlaneIdentifiers.ts]
2+
const 𝑚 = 4;
3+
const 𝑀 = 5;
4+
console.log(𝑀 + 𝑚); // 9
5+
6+
7+
//// [extendedUnicodePlaneIdentifiers.js]
8+
const 𝑚 = 4;
9+
const 𝑀 = 5;
10+
console.log(𝑀 + 𝑚); // 9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
=== tests/cases/compiler/extendedUnicodePlaneIdentifiers.ts ===
2+
const 𝑚 = 4;
3+
>𝑚 : Symbol(𝑚, Decl(extendedUnicodePlaneIdentifiers.ts, 0, 5))
4+
5+
const 𝑀 = 5;
6+
>𝑀 : Symbol(𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 1, 5))
7+
8+
console.log(𝑀 + 𝑚); // 9
9+
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
10+
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
11+
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
12+
>𝑀 : Symbol(𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 1, 5))
13+
>𝑚 : Symbol(𝑚, Decl(extendedUnicodePlaneIdentifiers.ts, 0, 5))
14+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
=== tests/cases/compiler/extendedUnicodePlaneIdentifiers.ts ===
2+
const 𝑚 = 4;
3+
>𝑚 : 4
4+
>4 : 4
5+
6+
const 𝑀 = 5;
7+
>𝑀 : 5
8+
>5 : 5
9+
10+
console.log(𝑀 + 𝑚); // 9
11+
>console.log(𝑀 + 𝑚) : void
12+
>console.log : (message?: any, ...optionalParams: any[]) => void
13+
>console : Console
14+
>log : (message?: any, ...optionalParams: any[]) => void
15+
>𝑀 + 𝑚 : number
16+
>𝑀 : 5
17+
>𝑚 : 4
18+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// @target: es2018
2+
const 𝑚 = 4;
3+
const 𝑀 = 5;
4+
console.log(𝑀 + 𝑚); // 9

0 commit comments

Comments
 (0)