Skip to content

Commit

Permalink
Fix unicode escapes in jsx identifiers and extended unicode character…
Browse files Browse the repository at this point in the history
…s in jsdoc (#32716)

* Fix unicode escapes in jsx identifiers and extended unicode characters in jsdoc

* Support unicode escapes in JSDoc

* Add tests for extended escapes
  • Loading branch information
weswigham authored Aug 6, 2019
1 parent 480b739 commit f333684
Show file tree
Hide file tree
Showing 20 changed files with 455 additions and 13 deletions.
6 changes: 5 additions & 1 deletion src/compiler/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7305,10 +7305,14 @@ namespace ts {
return createMissingNode<Identifier>(SyntaxKind.Identifier, /*reportAtCurrentPosition*/ !message, message || Diagnostics.Identifier_expected);
}

identifierCount++;
const pos = scanner.getTokenPos();
const end = scanner.getTextPos();
const result = <Identifier>createNode(SyntaxKind.Identifier, pos);
result.escapedText = escapeLeadingUnderscores(scanner.getTokenText());
if (token() !== SyntaxKind.Identifier) {
result.originalKeywordKind = token();
}
result.escapedText = escapeLeadingUnderscores(internIdentifier(scanner.getTokenValue()));
finishNode(result, end);

nextTokenJSDoc();
Expand Down
50 changes: 38 additions & 12 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ namespace ts {
}

function checkForIdentifierStartAfterNumericLiteral(numericStart: number, isScientific?: boolean) {
if (!isIdentifierStart(text.charCodeAt(pos), languageVersion)) {
if (!isIdentifierStart(codePointAt(text, pos), languageVersion)) {
return;
}

Expand Down Expand Up @@ -2063,17 +2063,22 @@ namespace ts {
// they allow dashes
function scanJsxIdentifier(): SyntaxKind {
if (tokenIsIdentifierOrKeyword(token)) {
const firstCharPosition = pos;
// An identifier or keyword has already been parsed - check for a `-` and then append it and everything after it to the token
// Do note that this means that `scanJsxIdentifier` effectively _mutates_ the visible token without advancing to a new token
// Any caller should be expecting this behavior and should only read the pos or token value after calling it.
while (pos < end) {
const ch = text.charCodeAt(pos);
if (ch === CharacterCodes.minus || ((firstCharPosition === pos) ? isIdentifierStart(ch, languageVersion) : isIdentifierPart(ch, languageVersion))) {
if (ch === CharacterCodes.minus) {
tokenValue += "-";
pos++;
continue;
}
else {
const oldPos = pos;
tokenValue += scanIdentifierParts(); // reuse `scanIdentifierParts` so unicode escapes are handled
if (pos === oldPos) {
break;
}
}
tokenValue += text.substring(firstCharPosition, pos);
}
return token;
}
Expand All @@ -2099,8 +2104,8 @@ namespace ts {
return token = SyntaxKind.EndOfFileToken;
}

const ch = text.charCodeAt(pos);
pos++;
const ch = codePointAt(text, pos);
pos += charSize(ch);
switch (ch) {
case CharacterCodes.tab:
case CharacterCodes.verticalTab:
Expand Down Expand Up @@ -2138,13 +2143,34 @@ namespace ts {
return token = SyntaxKind.DotToken;
case CharacterCodes.backtick:
return token = SyntaxKind.BacktickToken;
}
case CharacterCodes.backslash:
pos--;
const extendedCookedChar = peekExtendedUnicodeEscape();
if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) {
pos += 3;
tokenFlags |= TokenFlags.ExtendedUnicodeEscape;
tokenValue = scanExtendedUnicodeEscape() + scanIdentifierParts();
return token = getIdentifierToken();
}

if (isIdentifierStart(ch, ScriptTarget.Latest)) {
while (isIdentifierPart(text.charCodeAt(pos), ScriptTarget.Latest) && pos < end) {
const cookedChar = peekUnicodeEscape();
if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) {
pos += 6;
tokenValue = String.fromCharCode(cookedChar) + scanIdentifierParts();
return token = getIdentifierToken();
}
error(Diagnostics.Invalid_character);
pos++;
}
return token = SyntaxKind.Unknown;
}

if (isIdentifierStart(ch, languageVersion)) {
let char = ch;
while (pos < end && isIdentifierPart(char = codePointAt(text, pos), languageVersion)) pos += charSize(char);
tokenValue = text.substring(tokenPos, pos);
if (char === CharacterCodes.backslash) {
tokenValue += scanIdentifierParts();
}
return token = getIdentifierToken();
}
else {
Expand Down Expand Up @@ -2265,7 +2291,7 @@ namespace ts {

/* @internal */
function charSize(ch: number) {
if (ch > 0x10000) {
if (ch >= 0x10000) {
return 2;
}
return 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"end": 13,
"modifierFlagsCache": 0,
"transformFlags": 0,
"originalKeywordKind": "TypeKeyword",
"escapedText": "type"
},
"typeExpression": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"end": 13,
"modifierFlagsCache": 0,
"transformFlags": 0,
"originalKeywordKind": "TypeKeyword",
"escapedText": "type"
},
"typeExpression": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"end": 15,
"modifierFlagsCache": 0,
"transformFlags": 0,
"originalKeywordKind": "ReturnKeyword",
"escapedText": "return"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"end": 15,
"modifierFlagsCache": 0,
"transformFlags": 0,
"originalKeywordKind": "ReturnKeyword",
"escapedText": "return"
},
"typeExpression": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"end": 15,
"modifierFlagsCache": 0,
"transformFlags": 0,
"originalKeywordKind": "ReturnKeyword",
"escapedText": "return"
},
"typeExpression": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"end": 13,
"modifierFlagsCache": 0,
"transformFlags": 0,
"originalKeywordKind": "TypeKeyword",
"escapedText": "type"
},
"typeExpression": {
Expand Down
19 changes: 19 additions & 0 deletions tests/baselines/reference/extendedUnicodePlaneIdentifiersJSDoc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//// [file.js]
/**
* Adds
* @param {number} 𝑚
* @param {number} 𝑀
*/
function foo(𝑚, 𝑀) {
console.log(𝑀 + 𝑚);
}

//// [file.js]
/**
* Adds
* @param {number} 𝑚
* @param {number} 𝑀
*/
function foo(𝑚, 𝑀) {
console.log(𝑀 + 𝑚);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
=== tests/cases/compiler/file.js ===
/**
* Adds
* @param {number} 𝑚
* @param {number} 𝑀
*/
function foo(𝑚, 𝑀) {
>foo : Symbol(foo, Decl(file.js, 0, 0))
>𝑚 : Symbol(𝑚, Decl(file.js, 5, 13))
>𝑀 : Symbol(𝑀, Decl(file.js, 5, 16))

console.log(𝑀 + 𝑚);
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>𝑀 : Symbol(𝑀, Decl(file.js, 5, 16))
>𝑚 : Symbol(𝑚, Decl(file.js, 5, 13))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
=== tests/cases/compiler/file.js ===
/**
* Adds
* @param {number} 𝑚
* @param {number} 𝑀
*/
function foo(𝑚, 𝑀) {
>foo : (𝑚: number, 𝑀: number) => void
>𝑚 : number
>𝑀 : number

console.log(𝑀 + 𝑚);
>console.log(𝑀 + 𝑚) : void
>console.log : (message?: any, ...optionalParams: any[]) => void
>console : Console
>log : (message?: any, ...optionalParams: any[]) => void
>𝑀 + 𝑚 : number
>𝑀 : number
>𝑚 : number
}
33 changes: 33 additions & 0 deletions tests/baselines/reference/unicodeEscapesInJSDoc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//// [file.js]
/**
* @param {number} \u0061
* @param {number} a\u0061
*/
function foo(a, aa) {
console.log(a + aa);
}

/**
* @param {number} \u{0061}
* @param {number} a\u{0061}
*/
function bar(a, aa) {
console.log(a + aa);
}


//// [file.js]
/**
* @param {number} \u0061
* @param {number} a\u0061
*/
function foo(a, aa) {
console.log(a + aa);
}
/**
* @param {number} \u{0061}
* @param {number} a\u{0061}
*/
function bar(a, aa) {
console.log(a + aa);
}
35 changes: 35 additions & 0 deletions tests/baselines/reference/unicodeEscapesInJSDoc.symbols
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
=== tests/cases/compiler/file.js ===
/**
* @param {number} \u0061
* @param {number} a\u0061
*/
function foo(a, aa) {
>foo : Symbol(foo, Decl(file.js, 0, 0))
>a : Symbol(a, Decl(file.js, 4, 13))
>aa : Symbol(aa, Decl(file.js, 4, 15))

console.log(a + aa);
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>a : Symbol(a, Decl(file.js, 4, 13))
>aa : Symbol(aa, Decl(file.js, 4, 15))
}

/**
* @param {number} \u{0061}
* @param {number} a\u{0061}
*/
function bar(a, aa) {
>bar : Symbol(bar, Decl(file.js, 6, 1))
>a : Symbol(a, Decl(file.js, 12, 13))
>aa : Symbol(aa, Decl(file.js, 12, 15))

console.log(a + aa);
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>a : Symbol(a, Decl(file.js, 12, 13))
>aa : Symbol(aa, Decl(file.js, 12, 15))
}

39 changes: 39 additions & 0 deletions tests/baselines/reference/unicodeEscapesInJSDoc.types
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
=== tests/cases/compiler/file.js ===
/**
* @param {number} \u0061
* @param {number} a\u0061
*/
function foo(a, aa) {
>foo : (a: number, aa: number) => void
>a : number
>aa : number

console.log(a + aa);
>console.log(a + aa) : void
>console.log : (message?: any, ...optionalParams: any[]) => void
>console : Console
>log : (message?: any, ...optionalParams: any[]) => void
>a + aa : number
>a : number
>aa : number
}

/**
* @param {number} \u{0061}
* @param {number} a\u{0061}
*/
function bar(a, aa) {
>bar : (a: number, aa: number) => void
>a : number
>aa : number

console.log(a + aa);
>console.log(a + aa) : void
>console.log : (message?: any, ...optionalParams: any[]) => void
>console : Console
>log : (message?: any, ...optionalParams: any[]) => void
>a + aa : number
>a : number
>aa : number
}

34 changes: 34 additions & 0 deletions tests/baselines/reference/unicodeEscapesInJsxtags.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//// [file.tsx]
import * as React from "react";
declare global {
namespace JSX {
interface IntrinsicElements {
"a-b": any;
"a-c": any;
}
}
}
const Compa = (x: {x: number}) => <div>{"" + x}</div>;

let a = <\u0061></a>; // works
let ab = <\u0061-b></a-b>; // works
let ac = <a-\u0063></a-c>; // works
let compa = <Comp\u0061 x={12} />; // works

let a2 = <\u{0061}></a>; // works
let ab2 = <\u{0061}-b></a-b>; // works
let ac2 = <a-\u{0063}></a-c>; // works
let compa2 = <Comp\u{0061} x={12} />; // works


//// [file.js]
import * as React from "react";
const Compa = (x) => React.createElement("div", null, "" + x);
let a = React.createElement("a", null); // works
let ab = React.createElement("a-b", null); // works
let ac = React.createElement("a-c", null); // works
let compa = React.createElement(Comp\u0061, { x: 12 }); // works
let a2 = React.createElement("a", null); // works
let ab2 = React.createElement("a-b", null); // works
let ac2 = React.createElement("a-c", null); // works
let compa2 = React.createElement(Comp\u{0061}, { x: 12 }); // works
Loading

0 comments on commit f333684

Please sign in to comment.