diff --git a/CHANGELOG.md b/CHANGELOG.md index b4ed6efb01e..9410c779151 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## Unreleased + +* Fix U+30FB and U+FF65 in identifier names in ES5 vs. ES6+ ([#1599](https://github.com/evanw/esbuild/issues/1599)) + + The ES6 specification caused two code points that were previously valid in identifier names in ES5 to no longer be valid in identifier names in ES6+. The two code points are: + + * `U+30FB` i.e. `KATAKANA MIDDLE DOT` i.e. `・` + * `U+FF65` i.e. `HALFWIDTH KATAKANA MIDDLE DOT` i.e. `・` + + This means that using ES6+ parsing rules will fail to parse some valid ES5 code, and generating valid ES5 code may fail to be parsed using ES6+ parsing rules. For example, esbuild would previously fail to parse `x.y・` even though it's valid ES5 code (since it's not valid ES6+ code) and esbuild could generate `{y・:x}` when minifying even though it's not valid ES6+ code (since it's valid ES5 code). This problem is the result of my incorrect assumption that ES6 is a superset of ES5. + + As of this release, esbuild will now parse a superset of ES5 and ES6+ and will now quote identifier names when possible if it's not considered to be a valid identifier name in either ES5 or ES6+. In other words, a union of ES5 and ES6 rules is used for parsing and the intersection of ES5 and ES6 rules is used for printing. + ## 0.12.27 * Update JavaScript syntax feature compatibility tables ([#1594](https://github.com/evanw/esbuild/issues/1594)) diff --git a/internal/js_lexer/js_lexer.go b/internal/js_lexer/js_lexer.go index ed161e26aab..27aa6a4e83c 100644 --- a/internal/js_lexer/js_lexer.go +++ b/internal/js_lexer/js_lexer.go @@ -577,17 +577,17 @@ func IsIdentifier(text string) bool { return true } -func IsIdentifierES5(text string) bool { +func IsIdentifierES5AndESNext(text string) bool { if len(text) == 0 { return false } for i, codePoint := range text { if i == 0 { - if !IsIdentifierStartES5(codePoint) { + if !IsIdentifierStartES5AndESNext(codePoint) { return false } } else { - if !IsIdentifierContinueES5(codePoint) { + if !IsIdentifierContinueES5AndESNext(codePoint) { return false } } @@ -652,8 +652,8 @@ func IsIdentifierUTF16(text []uint16) bool { return true } -// This does "IsIdentifierES5(UTF16ToString(text))" without any allocations -func IsIdentifierES5UTF16(text []uint16) bool { +// This does "IsIdentifierES5AndESNext(UTF16ToString(text))" without any allocations +func IsIdentifierES5AndESNextUTF16(text []uint16) bool { n := len(text) if n == 0 { return false @@ -668,11 +668,11 @@ func IsIdentifierES5UTF16(text []uint16) bool { } } if isStart { - if !IsIdentifierStartES5(r1) { + if !IsIdentifierStartES5AndESNext(r1) { return false } } else { - if !IsIdentifierContinueES5(r1) { + if !IsIdentifierContinueES5AndESNext(r1) { return false } } @@ -695,7 +695,7 @@ func IsIdentifierStart(codePoint rune) bool { return false } - return unicode.Is(idStart, codePoint) + return unicode.Is(idStartES5OrESNext, codePoint) } func IsIdentifierContinue(codePoint rune) bool { @@ -718,10 +718,10 @@ func IsIdentifierContinue(codePoint rune) bool { return true } - return unicode.Is(idContinue, codePoint) + return unicode.Is(idContinueES5OrESNext, codePoint) } -func IsIdentifierStartES5(codePoint rune) bool { +func IsIdentifierStartES5AndESNext(codePoint rune) bool { switch codePoint { case '_', '$', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', @@ -736,10 +736,10 @@ func IsIdentifierStartES5(codePoint rune) bool { return false } - return unicode.Is(idStartES5, codePoint) + return unicode.Is(idStartES5AndESNext, codePoint) } -func IsIdentifierContinueES5(codePoint rune) bool { +func IsIdentifierContinueES5AndESNext(codePoint rune) bool { switch codePoint { case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', @@ -759,7 +759,7 @@ func IsIdentifierContinueES5(codePoint rune) bool { return true } - return unicode.Is(idContinueES5, codePoint) + return unicode.Is(idContinueES5AndESNext, codePoint) } // See the "White Space Code Points" table in the ECMAScript standard diff --git a/internal/js_lexer/unicode.go b/internal/js_lexer/unicode.go index 6dbb0900515..c20cd543542 100644 --- a/internal/js_lexer/unicode.go +++ b/internal/js_lexer/unicode.go @@ -3,7 +3,7 @@ package js_lexer import "unicode" -var idStartES5 = &unicode.RangeTable{ +var idStartES5AndESNext = &unicode.RangeTable{ LatinOffset: 117, R16: []unicode.Range16{ {Lo: 0x41, Hi: 0x5a, Stride: 1}, @@ -266,7 +266,7 @@ var idStartES5 = &unicode.RangeTable{ }, } -var idContinueES5 = &unicode.RangeTable{ +var idContinueES5AndESNext = &unicode.RangeTable{ LatinOffset: 128, R16: []unicode.Range16{ {Lo: 0x30, Hi: 0x39, Stride: 1}, @@ -578,7 +578,8 @@ var idContinueES5 = &unicode.RangeTable{ {Lo: 0x3041, Hi: 0x3094, Stride: 1}, {Lo: 0x3099, Hi: 0x309a, Stride: 1}, {Lo: 0x309d, Hi: 0x309e, Stride: 1}, - {Lo: 0x30a1, Hi: 0x30fe, Stride: 1}, + {Lo: 0x30a1, Hi: 0x30fa, Stride: 1}, + {Lo: 0x30fc, Hi: 0x30fe, Stride: 1}, {Lo: 0x3105, Hi: 0x312c, Stride: 1}, {Lo: 0x3131, Hi: 0x318e, Stride: 1}, {Lo: 0x31a0, Hi: 0x31b7, Stride: 1}, @@ -610,7 +611,7 @@ var idContinueES5 = &unicode.RangeTable{ {Lo: 0xff21, Hi: 0xff3a, Stride: 1}, {Lo: 0xff3f, Hi: 0xff3f, Stride: 1}, {Lo: 0xff41, Hi: 0xff5a, Stride: 1}, - {Lo: 0xff65, Hi: 0xffbe, Stride: 1}, + {Lo: 0xff66, Hi: 0xffbe, Stride: 1}, {Lo: 0xffc2, Hi: 0xffc7, Stride: 1}, {Lo: 0xffca, Hi: 0xffcf, Stride: 1}, {Lo: 0xffd2, Hi: 0xffd7, Stride: 1}, @@ -618,7 +619,7 @@ var idContinueES5 = &unicode.RangeTable{ }, } -var idStart = &unicode.RangeTable{ +var idStartES5OrESNext = &unicode.RangeTable{ LatinOffset: 117, R16: []unicode.Range16{ {Lo: 0x41, Hi: 0x5a, Stride: 1}, @@ -1248,7 +1249,7 @@ var idStart = &unicode.RangeTable{ }, } -var idContinue = &unicode.RangeTable{ +var idContinueES5OrESNext = &unicode.RangeTable{ LatinOffset: 129, R16: []unicode.Range16{ {Lo: 0x30, Hi: 0x39, Stride: 1}, @@ -1600,8 +1601,7 @@ var idContinue = &unicode.RangeTable{ {Lo: 0x3038, Hi: 0x303c, Stride: 1}, {Lo: 0x3041, Hi: 0x3096, Stride: 1}, {Lo: 0x3099, Hi: 0x309f, Stride: 1}, - {Lo: 0x30a1, Hi: 0x30fa, Stride: 1}, - {Lo: 0x30fc, Hi: 0x30ff, Stride: 1}, + {Lo: 0x30a1, Hi: 0x30ff, Stride: 1}, {Lo: 0x3105, Hi: 0x312f, Stride: 1}, {Lo: 0x3131, Hi: 0x318e, Stride: 1}, {Lo: 0x31a0, Hi: 0x31bf, Stride: 1}, @@ -1678,7 +1678,7 @@ var idContinue = &unicode.RangeTable{ {Lo: 0xff21, Hi: 0xff3a, Stride: 1}, {Lo: 0xff3f, Hi: 0xff3f, Stride: 1}, {Lo: 0xff41, Hi: 0xff5a, Stride: 1}, - {Lo: 0xff66, Hi: 0xffbe, Stride: 1}, + {Lo: 0xff65, Hi: 0xffbe, Stride: 1}, {Lo: 0xffc2, Hi: 0xffc7, Stride: 1}, {Lo: 0xffca, Hi: 0xffcf, Stride: 1}, {Lo: 0xffd2, Hi: 0xffd7, Stride: 1}, diff --git a/internal/js_parser/js_parser_test.go b/internal/js_parser/js_parser_test.go index d0b73518be7..5f06ace7994 100644 --- a/internal/js_parser/js_parser_test.go +++ b/internal/js_parser/js_parser_test.go @@ -574,6 +574,20 @@ func TestRegExp(t *testing.T) { `) } +func TestUnicodeIdentifierNames(t *testing.T) { + // There are two code points that are valid in identifiers in ES5 but not in ES6+: + // + // U+30FB KATAKANA MIDDLE DOT + // U+FF65 HALFWIDTH KATAKANA MIDDLE DOT + // + expectPrinted(t, "x = {x・: 0}", "x = { \"x・\": 0 };\n") + expectPrinted(t, "x = {x・: 0}", "x = { \"x・\": 0 };\n") + expectPrinted(t, "x = {xπ: 0}", "x = { xπ: 0 };\n") + expectPrinted(t, "x = y.x・", "x = y[\"x・\"];\n") + expectPrinted(t, "x = y.x・", "x = y[\"x・\"];\n") + expectPrinted(t, "x = y.xπ", "x = y.xπ;\n") +} + func TestIdentifierEscapes(t *testing.T) { expectPrinted(t, "var _\\u0076\\u0061\\u0072", "var _var;\n") expectParseError(t, "var \\u0076\\u0061\\u0072", ": error: Expected identifier but found \"\\\\u0076\\\\u0061\\\\u0072\"\n") diff --git a/internal/js_printer/js_printer.go b/internal/js_printer/js_printer.go index 052eb543768..c8f3dedda68 100644 --- a/internal/js_printer/js_printer.go +++ b/internal/js_printer/js_printer.go @@ -510,19 +510,19 @@ func (p *printer) printClauseAlias(alias string) { // JavaScript language target that we support. func CanEscapeIdentifier(name string, unsupportedJSFeatures compat.JSFeature, asciiOnly bool) bool { - return js_lexer.IsIdentifierES5(name) && (!asciiOnly || + return js_lexer.IsIdentifierES5AndESNext(name) && (!asciiOnly || !unsupportedJSFeatures.Has(compat.UnicodeEscapes) || !js_lexer.ContainsNonBMPCodePoint(name)) } func (p *printer) canPrintIdentifier(name string) bool { - return js_lexer.IsIdentifierES5(name) && (!p.options.ASCIIOnly || + return js_lexer.IsIdentifierES5AndESNext(name) && (!p.options.ASCIIOnly || !p.options.UnsupportedFeatures.Has(compat.UnicodeEscapes) || !js_lexer.ContainsNonBMPCodePoint(name)) } func (p *printer) canPrintIdentifierUTF16(name []uint16) bool { - return js_lexer.IsIdentifierES5UTF16(name) && (!p.options.ASCIIOnly || + return js_lexer.IsIdentifierES5AndESNextUTF16(name) && (!p.options.ASCIIOnly || !p.options.UnsupportedFeatures.Has(compat.UnicodeEscapes) || !js_lexer.ContainsNonBMPCodePointUTF16(name)) } diff --git a/scripts/gen-unicode-table.js b/scripts/gen-unicode-table.js index cc3ee65483a..481ae2eeab1 100644 --- a/scripts/gen-unicode-table.js +++ b/scripts/gen-unicode-table.js @@ -45,10 +45,20 @@ const idContinueES5 = idStartES5.concat( // is presumed to be the Unicode set, collection 10646. // // UnicodeIDStart: any Unicode code point with the Unicode property “ID_Start” -const idStart = require('@unicode/unicode-13.0.0/Binary_Property/ID_Start/code-points'); +const idStartESNext = require('@unicode/unicode-13.0.0/Binary_Property/ID_Start/code-points'); +const idStartESNextSet = new Set(idStartESNext); // UnicodeIDContinue: any Unicode code point with the Unicode property “ID_Continue” -const idContinue = require('@unicode/unicode-13.0.0/Binary_Property/ID_Continue/code-points'); +const idContinueESNext = require('@unicode/unicode-13.0.0/Binary_Property/ID_Continue/code-points'); +const idContinueESNextSet = new Set(idContinueESNext); + +// These identifiers are valid in both ES5 and ES6+ (i.e. an intersection of both) +const idStartES5AndESNext = idStartES5.filter(n => idStartESNextSet.has(n)); +const idContinueES5AndESNext = idContinueES5.filter(n => idContinueESNextSet.has(n)); + +// These identifiers are valid in either ES5 or ES6+ (i.e. a union of both) +const idStartES5OrESNext = [...new Set(idStartES5.concat(idStartESNext))].sort((a, b) => a - b); +const idContinueES5OrESNext = [...new Set(idContinueES5.concat(idContinueESNext))].sort((a, b) => a - b); function generateRangeTable(codePoints) { let lines = []; @@ -105,11 +115,11 @@ package js_lexer import "unicode" -var idStartES5 = ${generateRangeTable(idStartES5)} +var idStartES5AndESNext = ${generateRangeTable(idStartES5AndESNext)} -var idContinueES5 = ${generateRangeTable(idContinueES5)} +var idContinueES5AndESNext = ${generateRangeTable(idContinueES5AndESNext)} -var idStart = ${generateRangeTable(idStart)} +var idStartES5OrESNext = ${generateRangeTable(idStartES5OrESNext)} -var idContinue = ${generateRangeTable(idContinue)} +var idContinueES5OrESNext = ${generateRangeTable(idContinueES5OrESNext)} `);