diff --git a/news/2 Fixes/1779.md b/news/2 Fixes/1779.md new file mode 100644 index 000000000000..066f595d007c --- /dev/null +++ b/news/2 Fixes/1779.md @@ -0,0 +1 @@ +`editor.formatOnType` no longer breaks numbers formatted with underscores. diff --git a/src/client/language/characters.ts b/src/client/language/characters.ts index e0bc20ba4131..5a4da26a7b6d 100644 --- a/src/client/language/characters.ts +++ b/src/client/language/characters.ts @@ -83,18 +83,22 @@ export function isLineBreak(ch: number): boolean { return ch === Char.CarriageReturn || ch === Char.LineFeed; } +export function isNumber(ch: number): boolean { + return ch >= Char._0 && ch <= Char._9 || ch === Char.Underscore; +} + export function isDecimal(ch: number): boolean { - return ch >= Char._0 && ch <= Char._9; + return ch >= Char._0 && ch <= Char._9 || ch === Char.Underscore; } export function isHex(ch: number): boolean { - return isDecimal(ch) || (ch >= Char.a && ch <= Char.f) || (ch >= Char.A && ch <= Char.F); + return isDecimal(ch) || (ch >= Char.a && ch <= Char.f) || (ch >= Char.A && ch <= Char.F) || ch === Char.Underscore; } export function isOctal(ch: number): boolean { - return ch >= Char._0 && ch <= Char._7; + return ch >= Char._0 && ch <= Char._7 || ch === Char.Underscore; } export function isBinary(ch: number): boolean { - return ch === Char._0 || ch === Char._1; + return ch === Char._0 || ch === Char._1 || ch === Char.Underscore; } diff --git a/src/client/language/tokenizer.ts b/src/client/language/tokenizer.ts index 2574c388aeb1..52a3599f132b 100644 --- a/src/client/language/tokenizer.ts +++ b/src/client/language/tokenizer.ts @@ -4,7 +4,7 @@ // tslint:disable-next-line:import-name import Char from 'typescript-char'; -import { isBinary, isDecimal, isHex, isIdentifierChar, isIdentifierStartChar, isOctal } from './characters'; +import { isBinary, isDecimal, isHex, isIdentifierChar, isIdentifierStartChar, isOctal, isWhiteSpace } from './characters'; import { CharacterStream } from './characterStream'; import { TextRangeCollection } from './textRangeCollection'; import { ICharacterStream, ITextRangeCollection, IToken, ITokenizer, TextRange, TokenizerMode, TokenType } from './types'; @@ -29,13 +29,8 @@ class Token extends TextRange implements IToken { export class Tokenizer implements ITokenizer { private cs: ICharacterStream = new CharacterStream(''); private tokens: IToken[] = []; - private floatRegex = /[-+]?(?:(?:\d*\.\d+)|(?:\d+\.?))(?:[Ee][+-]?\d+)?/; private mode = TokenizerMode.Full; - constructor() { - //this.floatRegex.compile(); - } - public tokenize(text: string): ITextRangeCollection; public tokenize(text: string, start: number, length: number, mode: TokenizerMode): ITextRangeCollection; @@ -224,43 +219,74 @@ export class Tokenizer implements ITokenizer { if (this.cs.currentChar === Char._0) { let radix = 0; - // Try hex - if (this.cs.nextChar === Char.x || this.cs.nextChar === Char.X) { + // Try hex => hexinteger: "0" ("x" | "X") (["_"] hexdigit)+ + if ((this.cs.nextChar === Char.x || this.cs.nextChar === Char.X) && isHex(this.cs.lookAhead(2))) { this.cs.advance(2); while (isHex(this.cs.currentChar)) { this.cs.moveNext(); } radix = 16; } - // Try binary - if (this.cs.nextChar === Char.b || this.cs.nextChar === Char.B) { + // Try binary => bininteger: "0" ("b" | "B") (["_"] bindigit)+ + if ((this.cs.nextChar === Char.b || this.cs.nextChar === Char.B) && isBinary(this.cs.lookAhead(2))) { this.cs.advance(2); while (isBinary(this.cs.currentChar)) { this.cs.moveNext(); } radix = 2; } - // Try octal - if (this.cs.nextChar === Char.o || this.cs.nextChar === Char.O) { + // Try octal => octinteger: "0" ("o" | "O") (["_"] octdigit)+ + if ((this.cs.nextChar === Char.o || this.cs.nextChar === Char.O) && isOctal(this.cs.lookAhead(2))) { this.cs.advance(2); while (isOctal(this.cs.currentChar)) { this.cs.moveNext(); } radix = 8; } + if (radix > 0) { + const text = this.cs.getText().substr(start + leadingSign, this.cs.position - start - leadingSign); + if (!isNaN(parseInt(text, radix))) { + this.tokens.push(new Token(TokenType.Number, start, text.length + leadingSign)); + return true; + } + } + } + + let decimal = false; + // Try decimal int => + // decinteger: nonzerodigit (["_"] digit)* | "0" (["_"] "0")* + // nonzerodigit: "1"..."9" + // digit: "0"..."9" + if (this.cs.currentChar >= Char._1 && this.cs.currentChar <= Char._9) { + while (isDecimal(this.cs.currentChar)) { + this.cs.moveNext(); + } + decimal = this.cs.currentChar !== Char.Period && this.cs.currentChar !== Char.e && this.cs.currentChar !== Char.E; + } + + if (this.cs.currentChar === Char._0) { // "0" (["_"] "0")* + while (this.cs.currentChar === Char._0 || this.cs.currentChar === Char.Underscore) { + this.cs.moveNext(); + } + decimal = this.cs.currentChar !== Char.Period && this.cs.currentChar !== Char.e && this.cs.currentChar !== Char.E; + } + + if (decimal) { const text = this.cs.getText().substr(start + leadingSign, this.cs.position - start - leadingSign); - if (radix > 0 && parseInt(text.substr(2), radix)) { + if (!isNaN(parseInt(text, 10))) { this.tokens.push(new Token(TokenType.Number, start, text.length + leadingSign)); return true; } } - if (isDecimal(this.cs.currentChar) || this.cs.currentChar === Char.Period) { - const candidate = this.cs.getText().substr(this.cs.position); - const re = this.floatRegex.exec(candidate); - if (re && re.length > 0 && re[0] && candidate.startsWith(re[0])) { - this.tokens.push(new Token(TokenType.Number, start, re[0].length + leadingSign)); - this.cs.position = start + re[0].length + leadingSign; + // Floating point + if ((this.cs.currentChar >= Char._0 && this.cs.currentChar <= Char._9) || this.cs.currentChar === Char.Period) { + while (!isWhiteSpace(this.cs.currentChar)) { + this.cs.moveNext(); + } + const text = this.cs.getText().substr(start, this.cs.position - start); + if (!isNaN(parseFloat(text))) { + this.tokens.push(new Token(TokenType.Number, start, this.cs.position - start)); return true; } } @@ -380,7 +406,7 @@ export class Tokenizer implements ITokenizer { case 'rf': case 'ur': case 'br': - return 2; + return 2; default: break; } diff --git a/src/test/language/tokenizer.test.ts b/src/test/language/tokenizer.test.ts index 397a1f9f398d..7713b019ab0b 100644 --- a/src/test/language/tokenizer.test.ts +++ b/src/test/language/tokenizer.test.ts @@ -193,7 +193,7 @@ suite('Language.Tokenizer', () => { test('Hex number', () => { const t = new Tokenizer(); const tokens = t.tokenize('1 0X2 0x3 0x'); - assert.equal(tokens.count, 4); + assert.equal(tokens.count, 5); assert.equal(tokens.getItemAt(0).type, TokenType.Number); assert.equal(tokens.getItemAt(0).length, 1); @@ -204,13 +204,16 @@ suite('Language.Tokenizer', () => { assert.equal(tokens.getItemAt(2).type, TokenType.Number); assert.equal(tokens.getItemAt(2).length, 3); - assert.equal(tokens.getItemAt(3).type, TokenType.Unknown); - assert.equal(tokens.getItemAt(3).length, 2); + assert.equal(tokens.getItemAt(3).type, TokenType.Number); + assert.equal(tokens.getItemAt(3).length, 1); + + assert.equal(tokens.getItemAt(4).type, TokenType.Identifier); + assert.equal(tokens.getItemAt(4).length, 1); }); test('Binary number', () => { const t = new Tokenizer(); const tokens = t.tokenize('1 0B1 0b010 0b3 0b'); - assert.equal(tokens.count, 6); + assert.equal(tokens.count, 7); assert.equal(tokens.getItemAt(0).type, TokenType.Number); assert.equal(tokens.getItemAt(0).length, 1); @@ -227,13 +230,16 @@ suite('Language.Tokenizer', () => { assert.equal(tokens.getItemAt(4).type, TokenType.Identifier); assert.equal(tokens.getItemAt(4).length, 2); - assert.equal(tokens.getItemAt(5).type, TokenType.Unknown); - assert.equal(tokens.getItemAt(5).length, 2); + assert.equal(tokens.getItemAt(5).type, TokenType.Number); + assert.equal(tokens.getItemAt(5).length, 1); + + assert.equal(tokens.getItemAt(6).type, TokenType.Identifier); + assert.equal(tokens.getItemAt(6).length, 1); }); test('Octal number', () => { const t = new Tokenizer(); const tokens = t.tokenize('1 0o4 0o077 -0o200 0o9 0oO'); - assert.equal(tokens.count, 7); + assert.equal(tokens.count, 8); assert.equal(tokens.getItemAt(0).type, TokenType.Number); assert.equal(tokens.getItemAt(0).length, 1); @@ -253,8 +259,11 @@ suite('Language.Tokenizer', () => { assert.equal(tokens.getItemAt(5).type, TokenType.Identifier); assert.equal(tokens.getItemAt(5).length, 2); - assert.equal(tokens.getItemAt(6).type, TokenType.Unknown); - assert.equal(tokens.getItemAt(6).length, 3); + assert.equal(tokens.getItemAt(6).type, TokenType.Number); + assert.equal(tokens.getItemAt(6).length, 1); + + assert.equal(tokens.getItemAt(7).type, TokenType.Identifier); + assert.equal(tokens.getItemAt(7).length, 2); }); test('Decimal number', () => { const t = new Tokenizer(); @@ -301,6 +310,17 @@ suite('Language.Tokenizer', () => { assert.equal(tokens.getItemAt(5).type, TokenType.Number); assert.equal(tokens.getItemAt(5).length, 5); }); + test('Underscore numbers', () => { + const t = new Tokenizer(); + const tokens = t.tokenize('+1_0_0_0 0_0 .5_00_3e-4 0xCAFE_F00D 10_000_000.0 0b_0011_1111_0100_1110'); + const lengths = [8, 3, 10, 11, 12, 22]; + assert.equal(tokens.count, 6); + + for (let i = 0; i < tokens.count; i += 1) { + assert.equal(tokens.getItemAt(i).type, TokenType.Number); + assert.equal(tokens.getItemAt(i).length, lengths[i]); + } + }); test('Simple expression, leading minus', () => { const t = new Tokenizer(); const tokens = t.tokenize('x == -y');