microsoft · DanielRosenwasser · Mar 3, 2015 · Feb 25, 2015 · Feb 25, 2015 · Feb 26, 2015
diff --git a/src/compiler/core.ts b/src/compiler/core.ts
@@ -623,29 +623,6 @@ module ts {
         "\u0085": "\\u0085"  // nextLine
     };
 
-    /**
-     * Based heavily on the abstract 'Quote'/ 'QuoteJSONString' operation from ECMA-262 (24.3.2.2),
-     * but augmented for a few select characters.
-     * Note that this doesn't actually wrap the input in double quotes.
-     */
-    export function escapeString(s: string): string {
-        // Prioritize '"' and '\'
-        s = backslashOrDoubleQuote.test(s) ? s.replace(backslashOrDoubleQuote, getReplacement) : s;
-        s = escapedCharsRegExp.test(s) ? s.replace(escapedCharsRegExp, getReplacement) : s;
-
-        return s;
-
-        function getReplacement(c: string) {
-            return escapedCharsMap[c] || unicodeEscape(c);
-        }
-
-        function unicodeEscape(c: string): string {
-            var hexCharCode = c.charCodeAt(0).toString(16);
-            var paddedHexCode = ("0000" + hexCharCode).slice(-4);
-            return "\\u" + paddedHexCode;
-        }
-    }
-
     export function getDefaultLibFileName(options: CompilerOptions): string {
         return options.target === ScriptTarget.ES6 ? "lib.es6.d.ts" : "lib.d.ts";
     }

diff --git a/src/compiler/diagnosticInformationMap.generated.ts b/src/compiler/diagnosticInformationMap.generated.ts
@@ -155,6 +155,8 @@ module ts {
         Catch_clause_variable_name_must_be_an_identifier: { code: 1195, category: DiagnosticCategory.Error, key: "Catch clause variable name must be an identifier." },
         Catch_clause_variable_cannot_have_a_type_annotation: { code: 1196, category: DiagnosticCategory.Error, key: "Catch clause variable cannot have a type annotation." },
         Catch_clause_variable_cannot_have_an_initializer: { code: 1197, category: DiagnosticCategory.Error, key: "Catch clause variable cannot have an initializer." },
+        An_extended_Unicode_escape_value_must_be_between_0x0_and_0x10FFFF_inclusive: { code: 1198, category: DiagnosticCategory.Error, key: "An extended Unicode escape value must be between 0x0 and 0x10FFFF inclusive." },
+        expected: { code: 1199, category: DiagnosticCategory.Error, key: "'}' expected." },
         Duplicate_identifier_0: { code: 2300, category: DiagnosticCategory.Error, key: "Duplicate identifier '{0}'." },
         Initializer_of_instance_member_variable_0_cannot_reference_identifier_1_declared_in_the_constructor: { code: 2301, category: DiagnosticCategory.Error, key: "Initializer of instance member variable '{0}' cannot reference identifier '{1}' declared in the constructor." },
         Static_members_cannot_reference_class_type_parameters: { code: 2302, category: DiagnosticCategory.Error, key: "Static members cannot reference class type parameters." },

diff --git a/src/compiler/diagnosticMessages.json b/src/compiler/diagnosticMessages.json
@@ -611,7 +611,14 @@
         "category": "Error",
         "code": 1197
     },
-
+    "An extended Unicode escape value must be between 0x0 and 0x10FFFF inclusive.": {
+        "category": "Error",
+        "code": 1198
+    },
+    "'}' expected.": {
+        "category": "Error",
+        "code":  1199
+    },
     "Duplicate identifier '{0}'.": {
         "category": "Error",
         "code": 2300

diff --git a/src/compiler/emitter.ts b/src/compiler/emitter.ts
@@ -2191,9 +2191,12 @@ module ts {
             }
 
             function emitLiteral(node: LiteralExpression) {
-                var text = languageVersion < ScriptTarget.ES6 && isTemplateLiteralKind(node.kind) ? getTemplateLiteralAsStringLiteral(node) :
-                    node.parent ? getSourceTextOfNodeFromSourceFile(currentSourceFile, node) :
-                        node.text;
+                var text = languageVersion < ScriptTarget.ES6 && (isTemplateLiteralKind(node.kind) || node.hasExtendedUnicodeEscape)
+                    ? getDoubleQuotedStringTextOfLiteral(node)
+                    : node.parent
+                        ? getSourceTextOfNodeFromSourceFile(currentSourceFile, node)
+                        : node.text; // TODO(drosen): Is this correct?
+
                 if (compilerOptions.sourceMap && (node.kind === SyntaxKind.StringLiteral || isTemplateLiteralKind(node.kind))) {
                     writer.writeLiteral(text);
                 }
@@ -2205,9 +2208,12 @@ module ts {
                     write(text);
                 }
             }
-
-            function getTemplateLiteralAsStringLiteral(node: LiteralExpression): string {
-                return '"' + escapeString(node.text) + '"';
+
+            function getDoubleQuotedStringTextOfLiteral(node: LiteralExpression): string {
+                var result = escapeString(node.text);
+                result = replaceNonAsciiCharacters(result);
+
+                return '"' + result + '"';
             }
 
             function emitDownlevelRawTemplateLiteral(node: LiteralExpression) {

diff --git a/src/compiler/parser.ts b/src/compiler/parser.ts
@@ -2163,6 +2163,10 @@ module ts {
             var text = scanner.getTokenValue();
             node.text = internName ? internIdentifier(text) : text;
 
+            if (scanner.hasExtendedUnicodeEscape()) {
+                node.hasExtendedUnicodeEscape = true;
+            }
+
             if (scanner.isUnterminated()) {
                 node.isUnterminated = true;
             }

diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts
@@ -14,6 +14,7 @@ module ts {
         getTokenPos(): number;
         getTokenText(): string;
         getTokenValue(): string;
+        hasExtendedUnicodeEscape(): boolean;
         hasPrecedingLineBreak(): boolean;
         isIdentifier(): boolean;
         isReservedWord(): boolean;
@@ -556,6 +557,7 @@ module ts {
         var token: SyntaxKind;
         var tokenValue: string;
         var precedingLineBreak: boolean;
+        var hasExtendedUnicodeEscape: boolean;
         var tokenIsUnterminated: boolean;
 
         function error(message: DiagnosticMessage, length?: number): void {
@@ -606,11 +608,21 @@ module ts {
             }
             return +(text.substring(start, pos));
         }
+
+        function scanExactNumberOfHexDigits(count: number): number {
+            return scanHexDigits(/*minCount*/ count, /*maxCount*/ count);
+        }
+
+        function scanMinimumNumberOfHexDigits(count: number): number {
+            return scanHexDigits(/*minCount*/ count, /*maxCount*/ undefined);
+        }
 
-        function scanHexDigits(count: number, mustMatchCount?: boolean): number {
+        function scanHexDigits(minCount: number, maxCount?: number): number {
+            var maxCountSpecified = maxCount !== undefined;
+
             var digits = 0;
             var value = 0;
-            while (digits < count || !mustMatchCount) {
+            while (!maxCountSpecified || digits < maxCount) {
                 var ch = text.charCodeAt(pos);
                 if (ch >= CharacterCodes._0 && ch <= CharacterCodes._9) {
                     value = value * 16 + ch - CharacterCodes._0;
@@ -627,7 +639,7 @@ module ts {
                 pos++;
                 digits++;
             }
-            if (digits < count) {
+            if (digits < minCount) {
                 value = -1;
             }
             return value;
@@ -764,11 +776,18 @@ module ts {
                     return "\'";
                 case CharacterCodes.doubleQuote:
                     return "\"";
-                case CharacterCodes.x:
                 case CharacterCodes.u:
-                    var ch = scanHexDigits(ch === CharacterCodes.x ? 2 : 4, /*mustMatchCount*/ true);
-                    if (ch >= 0) {
-                        return String.fromCharCode(ch);
+                    if (pos < len && text.charCodeAt(pos) === CharacterCodes.openBrace) {
+                        hasExtendedUnicodeEscape = true;
+                        pos++;
+                        return scanExtendedUnicodeEscape();
+                    }
+
+                    // fall through
+                case CharacterCodes.x:
+                    var escapedValue = scanExactNumberOfHexDigits(ch === CharacterCodes.x ? 2 : 4);
+                    if (escapedValue >= 0) {
+                        return String.fromCharCode(escapedValue);
                     }
                     else {
                         error(Diagnostics.Hexadecimal_digit_expected);
@@ -790,14 +809,62 @@ module ts {
                     return String.fromCharCode(ch);
             }
         }
+
+        function scanExtendedUnicodeEscape(): string {
+            var escapedValue = scanMinimumNumberOfHexDigits(1);
+            var isInvalidExtendedEscape = false;
+
+            // Validate the value of the digit
+            if (escapedValue < 0) {
+                error(Diagnostics.Hexadecimal_digit_expected)
+                isInvalidExtendedEscape = true;
+            }
+            else if (escapedValue > 0x10FFFF) {
+                error(Diagnostics.An_extended_Unicode_escape_value_must_be_between_0x0_and_0x10FFFF_inclusive);
+                isInvalidExtendedEscape = true;
+            }
+
+            if (pos >= len) {
+                error(Diagnostics.Unexpected_end_of_text);
+                isInvalidExtendedEscape = true;
+            }
+            else if (text.charCodeAt(pos) == CharacterCodes.closeBrace) {
+                // Only swallow the following character up if it's a '}'.
+                pos++;
+            }
+            else {
+                error(Diagnostics.expected); // '}' expected.
+                isInvalidExtendedEscape = true;
+            }
+
+            if (isInvalidExtendedEscape) {
+                return "";
+            }
+
+            return utf16EncodeAsString(escapedValue);
+        }
+
+        // Derived from the 10.1.1 UTF16Encoding of the ES6 Spec.
+        function utf16EncodeAsString(codePoint: number): string {
+            Debug.assert(0x0 <= codePoint && codePoint <= 0x10FFFF);
+
+            if (codePoint <= 65535) {
+                return String.fromCharCode(codePoint);
+            }
+
+            var codeUnit1 = Math.floor((codePoint - 65536) / 1024) + 0xD800;
+            var codeUnit2 = ((codePoint - 65536) % 1024) + 0xDC00;
+
+            return String.fromCharCode(codeUnit1, codeUnit2);
+        }
 
         // Current character is known to be a backslash. Check for Unicode escape of the form '\uXXXX'
         // and return code point value if valid Unicode escape is found. Otherwise return -1.
         function peekUnicodeEscape(): number {
             if (pos + 5 < len && text.charCodeAt(pos + 1) === CharacterCodes.u) {
                 var start = pos;
                 pos += 2;
-                var value = scanHexDigits(4, /*mustMatchCount*/ true);
+                var value = scanExactNumberOfHexDigits(4);
                 pos = start;
                 return value;
             }
@@ -869,6 +936,7 @@ module ts {
 
         function scan(): SyntaxKind {
             startPos = pos;
+            hasExtendedUnicodeEscape = false;
             precedingLineBreak = false;
             tokenIsUnterminated = false;
             while (true) {
@@ -1034,7 +1102,7 @@ module ts {
                     case CharacterCodes._0:
                         if (pos + 2 < len && (text.charCodeAt(pos + 1) === CharacterCodes.X || text.charCodeAt(pos + 1) === CharacterCodes.x)) {
                             pos += 2;
-                            var value = scanHexDigits(1, /*mustMatchCount*/ false);
+                            var value = scanMinimumNumberOfHexDigits(1);
                             if (value < 0) {
                                 error(Diagnostics.Hexadecimal_digit_expected);
                                 value = 0;
@@ -1336,6 +1404,7 @@ module ts {
             getTokenPos: () => tokenPos,
             getTokenText: () => text.substring(tokenPos, pos),
             getTokenValue: () => tokenValue,
+            hasExtendedUnicodeEscape: () => hasExtendedUnicodeEscape,
             hasPrecedingLineBreak: () => precedingLineBreak,
             isIdentifier: () => token === SyntaxKind.Identifier || token > SyntaxKind.LastReservedWord,
             isReservedWord: () => token >= SyntaxKind.FirstReservedWord && token <= SyntaxKind.LastReservedWord,

diff --git a/src/compiler/types.ts b/src/compiler/types.ts
@@ -655,6 +655,7 @@ module ts {
     export interface LiteralExpression extends PrimaryExpression {
         text: string;
         isUnterminated?: boolean;
+        hasExtendedUnicodeEscape?: boolean;
     }
 
     export interface StringLiteralExpression extends LiteralExpression {

diff --git a/src/compiler/utilities.ts b/src/compiler/utilities.ts
@@ -1130,7 +1130,7 @@ module ts {
             newEndN = Math.max(newEnd2, newEnd2 + (newEnd1 - oldEnd2));
         }
 
-        return createTextChangeRange(createTextSpanFromBounds(oldStartN, oldEndN), /*newLength: */newEndN - oldStartN);
+        return createTextChangeRange(createTextSpanFromBounds(oldStartN, oldEndN), /*newLength: */ newEndN - oldStartN);
     }
 
     // @internal
@@ -1212,4 +1212,53 @@ module ts {
             }
         }
     }
+
+    var backslashOrDoubleQuote = /[\"\\]/g;
+    var escapedCharsRegExp = /[\u0000-\u001f\t\v\f\b\r\n\u2028\u2029\u0085]/g;
+    var escapedCharsMap: Map<string> = {
+        "\0": "\\0",
+        "\t": "\\t",
+        "\v": "\\v",
+        "\f": "\\f",
+        "\b": "\\b",
+        "\r": "\\r",
+        "\n": "\\n",
+        "\\": "\\\\",
+        "\"": "\\\"",
+        "\u2028": "\\u2028", // lineSeparator
+        "\u2029": "\\u2029", // paragraphSeparator
+        "\u0085": "\\u0085"  // nextLine
+    };
+
+    /**
+     * Based heavily on the abstract 'Quote'/ 'QuoteJSONString' operation from ECMA-262 (24.3.2.2),
+     * but augmented for a few select characters.
+     * Note that this doesn't actually wrap the input in double quotes.
+     */
+    export function escapeString(s: string): string {
+        // Prioritize '"' and '\'
+        s = backslashOrDoubleQuote.test(s) ? s.replace(backslashOrDoubleQuote, getReplacement) : s;
+        s = escapedCharsRegExp.test(s) ? s.replace(escapedCharsRegExp, getReplacement) : s;
+
+        return s;
+
+        function getReplacement(c: string) {
+            return escapedCharsMap[c] || get16BitUnicodeEscapeSequence(c.charCodeAt(0));
+        }
+    }
+
+    function get16BitUnicodeEscapeSequence(charCode: number): string {
+        var hexCharCode = charCode.toString(16).toUpperCase();
+        var paddedHexCode = ("0000" + hexCharCode).slice(-4);
+        return "\\u" + paddedHexCode;
+    }
+
+    var nonAsciiCharacters = /[^\u0000-\u007F]/g;
+    export function replaceNonAsciiCharacters(s: string): string {
+        // Replace non-ASCII characters with '\uNNNN' escapes if any exist.
+        // Otherwise just return the original string.
+        return nonAsciiCharacters.test(s) ?
+            s.replace(nonAsciiCharacters, c => get16BitUnicodeEscapeSequence(c.charCodeAt(0))) :
+            s;
+    }
 }
diff --git a/tests/baselines/reference/APISample_compile.js b/tests/baselines/reference/APISample_compile.js
@@ -552,6 +552,7 @@ declare module "typescript" {
     interface LiteralExpression extends PrimaryExpression {
         text: string;
         isUnterminated?: boolean;
+        hasExtendedUnicodeEscape?: boolean;
     }
     interface StringLiteralExpression extends LiteralExpression {
         _stringLiteralExpressionBrand: any;
@@ -1420,6 +1421,7 @@ declare module "typescript" {
         getTokenPos(): number;
         getTokenText(): string;
         getTokenValue(): string;
+        hasExtendedUnicodeEscape(): boolean;
         hasPrecedingLineBreak(): boolean;
         isIdentifier(): boolean;
         isReservedWord(): boolean;

diff --git a/tests/baselines/reference/APISample_compile.types b/tests/baselines/reference/APISample_compile.types
@@ -1664,6 +1664,9 @@ declare module "typescript" {
 
         isUnterminated?: boolean;
 >isUnterminated : boolean
+
+        hasExtendedUnicodeEscape?: boolean;
+>hasExtendedUnicodeEscape : boolean
     }
     interface StringLiteralExpression extends LiteralExpression {
 >StringLiteralExpression : StringLiteralExpression
@@ -4477,6 +4480,9 @@ declare module "typescript" {
         getTokenValue(): string;
 >getTokenValue : () => string
 
+        hasExtendedUnicodeEscape(): boolean;
+>hasExtendedUnicodeEscape : () => boolean
+
         hasPrecedingLineBreak(): boolean;
 >hasPrecedingLineBreak : () => boolean
 

diff --git a/tests/baselines/reference/APISample_linter.js b/tests/baselines/reference/APISample_linter.js
@@ -583,6 +583,7 @@ declare module "typescript" {
     interface LiteralExpression extends PrimaryExpression {
         text: string;
         isUnterminated?: boolean;
+        hasExtendedUnicodeEscape?: boolean;
     }
     interface StringLiteralExpression extends LiteralExpression {
         _stringLiteralExpressionBrand: any;
@@ -1451,6 +1452,7 @@ declare module "typescript" {
         getTokenPos(): number;
         getTokenText(): string;
         getTokenValue(): string;
+        hasExtendedUnicodeEscape(): boolean;
         hasPrecedingLineBreak(): boolean;
         isIdentifier(): boolean;
         isReservedWord(): boolean;