From 3dd3ab41a8b469909d76c0274d185000d00cfe32 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Thu, 9 Nov 2023 01:14:49 +0900 Subject: [PATCH 1/5] improve performance --- src/tokenizer/code-point-iterator.ts | 36 +-- src/tokenizer/code-point.ts | 197 ++++++------ src/tokenizer/tokenizer.ts | 436 +++++++++++++-------------- 3 files changed, 320 insertions(+), 349 deletions(-) diff --git a/src/tokenizer/code-point-iterator.ts b/src/tokenizer/code-point-iterator.ts index 8b365a2..9367522 100644 --- a/src/tokenizer/code-point-iterator.ts +++ b/src/tokenizer/code-point-iterator.ts @@ -1,4 +1,4 @@ -import { NULL, EOF, LINE_FEED, CARRIAGE_RETURN } from "./code-point"; +import { CodePoint } from "./code-point"; type Position = { offset: number; @@ -9,7 +9,7 @@ type Position = { export class CodePointIterator { public readonly text: string; - private lastCodePoint: number = NULL; + private lastCodePoint: number = CodePoint.NULL; public start: Position = { offset: -1, @@ -31,31 +31,31 @@ export class CodePointIterator { } public next(): number { - if (this.lastCodePoint === EOF) { - return EOF; + if (this.lastCodePoint === CodePoint.EOF) { + return CodePoint.EOF; } this.start.offset = this.end.offset; this.start.line = this.end.line; this.start.column = this.end.column; - const cp = this.text.codePointAt(this.start.offset) ?? EOF; - if (cp === EOF) { + const cp = this.text.codePointAt(this.start.offset) ?? CodePoint.EOF; + if (cp === CodePoint.EOF) { this.end = this.start; return (this.lastCodePoint = cp); } const shift = cp >= 0x10000 ? 2 : 1; this.end.offset = this.start.offset + shift; - if (cp === LINE_FEED) { + if (cp === CodePoint.LINE_FEED) { this.end.line = this.start.line + 1; this.end.column = 0; - } else if (cp === CARRIAGE_RETURN) { - if (this.text.codePointAt(this.end.offset) === LINE_FEED) { + } else if (cp === CodePoint.CARRIAGE_RETURN) { + if (this.text.codePointAt(this.end.offset) === CodePoint.LINE_FEED) { this.end.offset++; this.end.line = this.start.line + 1; this.end.column = 0; } - return (this.lastCodePoint = LINE_FEED); + return (this.lastCodePoint = CodePoint.LINE_FEED); } else { this.end.column = this.start.column + shift; } @@ -66,15 +66,15 @@ export class CodePointIterator { public *iterateSubCodePoints(): IterableIterator { let index = this.end.offset; while (true) { - let cp = this.text.codePointAt(index) ?? EOF; - if (cp === CARRIAGE_RETURN) { - if (this.text.codePointAt(index) === LINE_FEED) { - cp = this.text.codePointAt(++index) ?? EOF; + let cp = this.text.codePointAt(index) ?? CodePoint.EOF; + if (cp === CodePoint.CARRIAGE_RETURN) { + if (this.text.codePointAt(index) === CodePoint.LINE_FEED) { + cp = this.text.codePointAt(++index) ?? CodePoint.EOF; } else { - cp = LINE_FEED; + cp = CodePoint.LINE_FEED; } } - if (cp === EOF) { + if (cp === CodePoint.EOF) { return; } yield cp; @@ -92,12 +92,12 @@ export class CodePointIterator { return { next() { if (end) { - return EOF; + return CodePoint.EOF; } const r = sub.next(); if (r.done) { end = true; - return EOF; + return CodePoint.EOF; } count++; return r.value; diff --git a/src/tokenizer/code-point.ts b/src/tokenizer/code-point.ts index 31d020d..6a4c1a2 100644 --- a/src/tokenizer/code-point.ts +++ b/src/tokenizer/code-point.ts @@ -1,125 +1,128 @@ -export const EOF = -1; -export const NULL = 0x00; -export const SOH = 0x01; -export const BACKSPACE = 0x08; -export const TABULATION = 0x09; -export const LINE_FEED = 0x0a; -export const FORM_FEED = 0x0c; -export const CARRIAGE_RETURN = 0x0d; -export const ESCAPE = 0x1b; -export const SO = 0x0e; -export const US = 0x1f; -export const SPACE = 0x20; -export const QUOTATION_MARK = 0x22; -export const HASH = 0x23; -export const SINGLE_QUOTE = 0x27; -export const PLUS_SIGN = 0x2b; -export const COMMA = 0x2c; -export const DASH = 0x2d; -export const DOT = 0x2e; -export const DIGIT_0 = 0x30; -export const DIGIT_1 = 0x31; -export const DIGIT_2 = 0x32; -export const DIGIT_3 = 0x33; -export const DIGIT_7 = 0x37; -export const DIGIT_9 = 0x39; -export const COLON = 0x3a; -export const EQUALS_SIGN = 0x3d; -export const LATIN_CAPITAL_A = 0x41; -export const LATIN_CAPITAL_E = 0x45; -export const LATIN_CAPITAL_F = 0x46; -export const LATIN_CAPITAL_T = 0x54; -export const LATIN_CAPITAL_U = 0x55; -export const LATIN_CAPITAL_Z = 0x5a; -export const LEFT_BRACKET = 0x5b; // [ -export const BACKSLASH = 0x5c; -export const RIGHT_BRACKET = 0x5d; // ] -export const UNDERSCORE = 0x5f; -export const LATIN_SMALL_A = 0x61; -export const LATIN_SMALL_B = 0x62; -export const LATIN_SMALL_E = 0x65; -export const LATIN_SMALL_F = 0x66; -export const LATIN_SMALL_I = 0x69; -export const LATIN_SMALL_L = 0x6c; -export const LATIN_SMALL_N = 0x6e; -export const LATIN_SMALL_O = 0x6f; -export const LATIN_SMALL_R = 0x72; -export const LATIN_SMALL_S = 0x73; -export const LATIN_SMALL_T = 0x74; -export const LATIN_SMALL_U = 0x75; -export const LATIN_SMALL_X = 0x78; -export const LATIN_SMALL_Z = 0x7a; -export const LEFT_BRACE = 0x7b; // { -export const RIGHT_BRACE = 0x7d; // } -export const DELETE = 0x7f; -export const PAD = 0x80; -export const SUPERSCRIPT_TWO = 0xb2; -export const SUPERSCRIPT_THREE = 0xb3; -export const SUPERSCRIPT_ONE = 0xb9; -export const VULGAR_FRACTION_ONE_QUARTER = 0xbc; -export const VULGAR_FRACTION_THREE_QUARTERS = 0xbe; -export const LATIN_CAPITAL_LETTER_A_WITH_GRAVE = 0xc0; -export const LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS = 0xd6; -export const LATIN_CAPITAL_LETTER_O_WITH_STROKE = 0xd8; -export const LATIN_SMALL_LETTER_O_WITH_DIAERESIS = 0xf6; -export const LATIN_SMALL_LETTER_O_WITH_STROKE = 0xf8; -export const GREEK_SMALL_REVERSED_DOTTED_LUNATE_SIGMA_SYMBOL = 0x37b; -export const GREEK_CAPITAL_LETTER_YOT = 0x37f; -export const CP_1FFF = 0x1fff; -export const ZERO_WIDTH_NON_JOINER = 0x200c; -export const ZERO_WIDTH_JOINER = 0x200d; -export const UNDERTIE = 0x203f; -export const CHARACTER_TIE = 0x2040; -export const SUPERSCRIPT_ZERO = 0x2070; -export const CP_218F = 0x218f; -export const CIRCLED_DIGIT_ONE = 0x2460; -export const NEGATIVE_CIRCLED_DIGIT_ZERO = 0x24ff; -export const GLAGOLITIC_CAPITAL_LETTER_AZU = 0x2c00; -export const CP_2FEF = 0x2fef; -export const IDEOGRAPHIC_COMMA = 0x3001; -export const CP_D7FF = 0xd7ff; -export const CP_E000 = 0xe000; -export const CJK_COMPATIBILITY_IDEOGRAPH_F900 = 0xf900; -export const ARABIC_LIGATURE_SALAAMUHU_ALAYNAA = 0xfdcf; -export const ARABIC_LIGATURE_SALLA_USED_AS_KORANIC_STOP_SIGN_ISOLATED_FORM = 0xfdf0; -export const REPLACEMENT_CHARACTER = 0xfffd; -export const LINEAR_B_SYLLABLE_B008_A = 0x10000; -export const CP_EFFFF = 0xeffff; -export const CP_10FFFF = 0x10ffff; +// eslint-disable-next-line no-shadow -- bug? +export const enum CodePoint { + EOF = -1, + NULL = 0x00, + SOH = 0x01, + BACKSPACE = 0x08, + TABULATION = 0x09, + LINE_FEED = 0x0a, + FORM_FEED = 0x0c, + CARRIAGE_RETURN = 0x0d, + ESCAPE = 0x1b, + SO = 0x0e, + US = 0x1f, + SPACE = 0x20, + QUOTATION_MARK = 0x22, + HASH = 0x23, + SINGLE_QUOTE = 0x27, + PLUS_SIGN = 0x2b, + COMMA = 0x2c, + DASH = 0x2d, + DOT = 0x2e, + DIGIT_0 = 0x30, + DIGIT_1 = 0x31, + DIGIT_2 = 0x32, + DIGIT_3 = 0x33, + DIGIT_7 = 0x37, + DIGIT_9 = 0x39, + COLON = 0x3a, + EQUALS_SIGN = 0x3d, + LATIN_CAPITAL_A = 0x41, + LATIN_CAPITAL_E = 0x45, + LATIN_CAPITAL_F = 0x46, + LATIN_CAPITAL_T = 0x54, + LATIN_CAPITAL_U = 0x55, + LATIN_CAPITAL_Z = 0x5a, + LEFT_BRACKET = 0x5b, // [ + BACKSLASH = 0x5c, + RIGHT_BRACKET = 0x5d, // ] + UNDERSCORE = 0x5f, + LATIN_SMALL_A = 0x61, + LATIN_SMALL_B = 0x62, + LATIN_SMALL_E = 0x65, + LATIN_SMALL_F = 0x66, + LATIN_SMALL_I = 0x69, + LATIN_SMALL_L = 0x6c, + LATIN_SMALL_N = 0x6e, + LATIN_SMALL_O = 0x6f, + LATIN_SMALL_R = 0x72, + LATIN_SMALL_S = 0x73, + LATIN_SMALL_T = 0x74, + LATIN_SMALL_U = 0x75, + LATIN_SMALL_X = 0x78, + LATIN_SMALL_Z = 0x7a, + LEFT_BRACE = 0x7b, // { + RIGHT_BRACE = 0x7d, // } + DELETE = 0x7f, + PAD = 0x80, + SUPERSCRIPT_TWO = 0xb2, + SUPERSCRIPT_THREE = 0xb3, + SUPERSCRIPT_ONE = 0xb9, + VULGAR_FRACTION_ONE_QUARTER = 0xbc, + VULGAR_FRACTION_THREE_QUARTERS = 0xbe, + LATIN_CAPITAL_LETTER_A_WITH_GRAVE = 0xc0, + LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS = 0xd6, + LATIN_CAPITAL_LETTER_O_WITH_STROKE = 0xd8, + LATIN_SMALL_LETTER_O_WITH_DIAERESIS = 0xf6, + LATIN_SMALL_LETTER_O_WITH_STROKE = 0xf8, + GREEK_SMALL_REVERSED_DOTTED_LUNATE_SIGMA_SYMBOL = 0x37b, + GREEK_CAPITAL_LETTER_YOT = 0x37f, + CP_1FFF = 0x1fff, + ZERO_WIDTH_NON_JOINER = 0x200c, + ZERO_WIDTH_JOINER = 0x200d, + UNDERTIE = 0x203f, + CHARACTER_TIE = 0x2040, + SUPERSCRIPT_ZERO = 0x2070, + CP_218F = 0x218f, + CIRCLED_DIGIT_ONE = 0x2460, + NEGATIVE_CIRCLED_DIGIT_ZERO = 0x24ff, + GLAGOLITIC_CAPITAL_LETTER_AZU = 0x2c00, + CP_2FEF = 0x2fef, + IDEOGRAPHIC_COMMA = 0x3001, + CP_D7FF = 0xd7ff, + CP_E000 = 0xe000, + CJK_COMPATIBILITY_IDEOGRAPH_F900 = 0xf900, + ARABIC_LIGATURE_SALAAMUHU_ALAYNAA = 0xfdcf, + ARABIC_LIGATURE_SALLA_USED_AS_KORANIC_STOP_SIGN_ISOLATED_FORM = 0xfdf0, + REPLACEMENT_CHARACTER = 0xfffd, + LINEAR_B_SYLLABLE_B008_A = 0x10000, + CP_EFFFF = 0xeffff, + CP_10FFFF = 0x10ffff, +} /** * Check whether the code point is a control character. */ export function isControl(cp: number): boolean { - return cp >= NULL && cp <= US; + return cp >= CodePoint.NULL && cp <= CodePoint.US; } /** * Check whether the code point is a whitespace. */ export function isWhitespace(cp: number): boolean { - return cp === TABULATION || cp === SPACE; + return cp === CodePoint.TABULATION || cp === CodePoint.SPACE; } /** * Check whether the code point is a end of line. */ export function isEOL(cp: number): boolean { - return cp === LINE_FEED || cp === CARRIAGE_RETURN; + return cp === CodePoint.LINE_FEED || cp === CodePoint.CARRIAGE_RETURN; } /** * Check whether the code point is an uppercase letter character. */ function isUpperLetter(cp: number): boolean { - return cp >= LATIN_CAPITAL_A && cp <= LATIN_CAPITAL_Z; + return cp >= CodePoint.LATIN_CAPITAL_A && cp <= CodePoint.LATIN_CAPITAL_Z; } /** * Check whether the code point is a lowercase letter character. */ function isLowerLetter(cp: number): boolean { - return cp >= LATIN_SMALL_A && cp <= LATIN_SMALL_Z; + return cp >= CodePoint.LATIN_SMALL_A && cp <= CodePoint.LATIN_SMALL_Z; } /** @@ -133,7 +136,7 @@ export function isLetter(cp: number): boolean { * Check whether the code point is a digit character. */ export function isDigit(cp: number): boolean { - return cp >= DIGIT_0 && cp <= DIGIT_9; + return cp >= CodePoint.DIGIT_0 && cp <= CodePoint.DIGIT_9; } /** @@ -142,15 +145,15 @@ export function isDigit(cp: number): boolean { export function isHexDig(cp: number): boolean { return ( isDigit(cp) || - (cp >= LATIN_SMALL_A && cp <= LATIN_SMALL_F) || - (cp >= LATIN_CAPITAL_A && cp <= LATIN_CAPITAL_F) + (cp >= CodePoint.LATIN_SMALL_A && cp <= CodePoint.LATIN_SMALL_F) || + (cp >= CodePoint.LATIN_CAPITAL_A && cp <= CodePoint.LATIN_CAPITAL_F) ); } /** * Check whether the code point is a octal digit character. */ export function isOctalDig(cp: number): boolean { - return cp >= DIGIT_0 && cp <= DIGIT_7; + return cp >= CodePoint.DIGIT_0 && cp <= CodePoint.DIGIT_7; } /** diff --git a/src/tokenizer/tokenizer.ts b/src/tokenizer/tokenizer.ts index 60a387c..79f4dc5 100644 --- a/src/tokenizer/tokenizer.ts +++ b/src/tokenizer/tokenizer.ts @@ -19,95 +19,15 @@ import type { TOMLVer } from "../parser-options"; import { getTOMLVer, type ParserOptions } from "../parser-options"; import { CodePointIterator } from "./code-point-iterator"; import { - EOF, - LINE_FEED, - NULL, + CodePoint, isWhitespace, isEOL, - EQUALS_SIGN, - QUOTATION_MARK, - LATIN_SMALL_B, - BACKSLASH, - LATIN_SMALL_T, - LATIN_SMALL_N, - LATIN_SMALL_F, - LATIN_SMALL_R, - BACKSPACE, - TABULATION, - FORM_FEED, - CARRIAGE_RETURN, - LATIN_SMALL_U, - LATIN_CAPITAL_U, isHexDig, isLetter, isDigit, - UNDERSCORE, - DASH, isControl, - DELETE, - HASH, - DOT, - SINGLE_QUOTE, - LATIN_SMALL_A, - LATIN_SMALL_I, - PLUS_SIGN, - DIGIT_0, - LATIN_SMALL_O, - LATIN_SMALL_X, - LATIN_SMALL_E, - LATIN_CAPITAL_E, - LATIN_SMALL_S, - LATIN_SMALL_L, - LEFT_BRACKET, - RIGHT_BRACKET, - LEFT_BRACE, - RIGHT_BRACE, - COMMA, isOctalDig, - DIGIT_1, - LATIN_CAPITAL_T, - SPACE, - COLON, - LATIN_CAPITAL_Z, - LATIN_SMALL_Z, isUnicodeScalarValue, - ESCAPE, - SUPERSCRIPT_TWO, - SUPERSCRIPT_THREE, - SUPERSCRIPT_ONE, - VULGAR_FRACTION_ONE_QUARTER, - VULGAR_FRACTION_THREE_QUARTERS, - LATIN_CAPITAL_LETTER_A_WITH_GRAVE, - LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, - LATIN_SMALL_LETTER_O_WITH_DIAERESIS, - LATIN_CAPITAL_LETTER_O_WITH_STROKE, - GREEK_SMALL_REVERSED_DOTTED_LUNATE_SIGMA_SYMBOL, - LATIN_SMALL_LETTER_O_WITH_STROKE, - GREEK_CAPITAL_LETTER_YOT, - CP_1FFF, - ZERO_WIDTH_NON_JOINER, - ZERO_WIDTH_JOINER, - UNDERTIE, - CHARACTER_TIE, - SUPERSCRIPT_ZERO, - CP_218F, - CIRCLED_DIGIT_ONE, - NEGATIVE_CIRCLED_DIGIT_ZERO, - GLAGOLITIC_CAPITAL_LETTER_AZU, - CP_2FEF, - IDEOGRAPHIC_COMMA, - CP_D7FF, - CJK_COMPATIBILITY_IDEOGRAPH_F900, - ARABIC_LIGATURE_SALAAMUHU_ALAYNAA, - ARABIC_LIGATURE_SALLA_USED_AS_KORANIC_STOP_SIGN_ISOLATED_FORM, - REPLACEMENT_CHARACTER, - LINEAR_B_SYLLABLE_B008_A, - CP_EFFFF, - SOH, - SO, - CP_10FFFF, - CP_E000, - PAD, } from "./code-point"; type Position = { @@ -151,26 +71,26 @@ const RADIX_PREFIXES = { const ESCAPES_1_0: Record = { // escape-seq-char = %x22 ; " quotation mark U+0022 - [QUOTATION_MARK]: String.fromCodePoint(QUOTATION_MARK), + [CodePoint.QUOTATION_MARK]: String.fromCodePoint(CodePoint.QUOTATION_MARK), // escape-seq-char =/ %x5C ; \ reverse solidus U+005C - [BACKSLASH]: String.fromCodePoint(BACKSLASH), + [CodePoint.BACKSLASH]: String.fromCodePoint(CodePoint.BACKSLASH), // escape-seq-char =/ %x62 ; b backspace U+0008 - [LATIN_SMALL_B]: String.fromCodePoint(BACKSPACE), + [CodePoint.LATIN_SMALL_B]: String.fromCodePoint(CodePoint.BACKSPACE), // escape-seq-char =/ %x66 ; f form feed U+000C - [LATIN_SMALL_F]: String.fromCodePoint(FORM_FEED), + [CodePoint.LATIN_SMALL_F]: String.fromCodePoint(CodePoint.FORM_FEED), // escape-seq-char =/ %x6E ; n line feed U+000A - [LATIN_SMALL_N]: String.fromCodePoint(LINE_FEED), + [CodePoint.LATIN_SMALL_N]: String.fromCodePoint(CodePoint.LINE_FEED), // escape-seq-char =/ %x72 ; r carriage return U+000D - [LATIN_SMALL_R]: String.fromCodePoint(CARRIAGE_RETURN), + [CodePoint.LATIN_SMALL_R]: String.fromCodePoint(CodePoint.CARRIAGE_RETURN), // escape-seq-char =/ %x74 ; t tab U+0009 - [LATIN_SMALL_T]: String.fromCodePoint(TABULATION), + [CodePoint.LATIN_SMALL_T]: String.fromCodePoint(CodePoint.TABULATION), }; const ESCAPES_LATEST: Record = { ...ESCAPES_1_0, // escape-seq-char =/ %x65 ; e escape U+001B // Added in TOML 1.1 - [LATIN_SMALL_E]: String.fromCodePoint(ESCAPE), + [CodePoint.LATIN_SMALL_E]: String.fromCodePoint(CodePoint.ESCAPE), }; type ExponentData = { @@ -211,7 +131,7 @@ export class Tokenizer { private backCode = false; - private lastCodePoint: number = NULL; + private lastCodePoint: number = CodePoint.NULL; private state: TokenizerState = "DATA"; @@ -279,7 +199,7 @@ export class Tokenizer { return token; } let cp = this.lastCodePoint; - while (cp !== EOF && !this.token) { + while (cp !== CodePoint.EOF && !this.token) { cp = this.nextCode(); const nextState = this[this.state](cp); if (!nextState) { @@ -296,8 +216,8 @@ export class Tokenizer { * Get the next code point. */ private nextCode(): number { - if (this.lastCodePoint === EOF) { - return EOF; + if (this.lastCodePoint === CodePoint.EOF) { + return CodePoint.EOF; } if (this.backCode) { this.backCode = false; @@ -488,37 +408,37 @@ export class Tokenizer { while (isWhitespace(cp) || isEOL(cp)) { cp = this.nextCode(); } - if (cp === HASH) { + if (cp === CodePoint.HASH) { this.startToken(); return "COMMENT"; } - if (cp === QUOTATION_MARK) { + if (cp === CodePoint.QUOTATION_MARK) { this.startToken(); return "BASIC_STRING"; } - if (cp === SINGLE_QUOTE) { + if (cp === CodePoint.SINGLE_QUOTE) { this.startToken(); return "LITERAL_STRING"; } if ( - cp === DOT || // . - cp === EQUALS_SIGN || // = - cp === LEFT_BRACKET || // [ - cp === RIGHT_BRACKET || // ] - cp === LEFT_BRACE || // { - cp === RIGHT_BRACE || // } - cp === COMMA // , + cp === CodePoint.DOT || // . + cp === CodePoint.EQUALS_SIGN || // = + cp === CodePoint.LEFT_BRACKET || // [ + cp === CodePoint.RIGHT_BRACKET || // ] + cp === CodePoint.LEFT_BRACE || // { + cp === CodePoint.RIGHT_BRACE || // } + cp === CodePoint.COMMA // , ) { this.punctuatorToken(); return "DATA"; } if (this.valuesEnabled) { - if (cp === DASH || cp === PLUS_SIGN) { + if (cp === CodePoint.DASH || cp === CodePoint.PLUS_SIGN) { this.startToken(); return "SIGN"; } - if (cp === LATIN_SMALL_N || cp === LATIN_SMALL_I) { + if (cp === CodePoint.LATIN_SMALL_N || cp === CodePoint.LATIN_SMALL_I) { this.startToken(); return this.back("NAN_OR_INF"); } @@ -526,7 +446,7 @@ export class Tokenizer { this.startToken(); return this.back("NUMBER"); } - if (cp === LATIN_SMALL_T || cp === LATIN_SMALL_F) { + if (cp === CodePoint.LATIN_SMALL_T || cp === CodePoint.LATIN_SMALL_F) { this.startToken(); return this.back("BOOLEAN"); } @@ -537,7 +457,7 @@ export class Tokenizer { } } - if (cp === EOF) { + if (cp === CodePoint.EOF) { // end return "DATA"; } @@ -557,7 +477,7 @@ export class Tokenizer { this.reportParseErrorControlChar(); } }; - while (!isEOL(cp) && cp !== EOF) { + while (!isEOL(cp) && cp !== CodePoint.EOF) { processCommentChar(cp); cp = this.nextCode(); } @@ -574,39 +494,46 @@ export class Tokenizer { } private BASIC_STRING(cp: number): TokenizerState { - if (cp === QUOTATION_MARK) { + if (cp === CodePoint.QUOTATION_MARK) { cp = this.nextCode(); - if (cp === QUOTATION_MARK) { + if (cp === CodePoint.QUOTATION_MARK) { return "MULTI_LINE_BASIC_STRING"; } this.endToken("BasicString", "start", ""); return this.back("DATA"); } let out = ""; - while (cp !== QUOTATION_MARK && cp !== EOF && cp !== LINE_FEED) { + while ( + cp !== CodePoint.QUOTATION_MARK && + cp !== CodePoint.EOF && + cp !== CodePoint.LINE_FEED + ) { if (isControlOtherThanTab(cp)) { return this.reportParseErrorControlChar(); } - if (cp === BACKSLASH) { + if (cp === CodePoint.BACKSLASH) { cp = this.nextCode(); const ecp = this.ESCAPES[cp]; if (ecp) { out += ecp; cp = this.nextCode(); continue; - } else if (cp === LATIN_SMALL_U) { + } else if (cp === CodePoint.LATIN_SMALL_U) { // escape-seq-char =/ %x75 4HEXDIG ; uHHHH U+HHHH const code = this.parseUnicode(4); out += String.fromCodePoint(code); cp = this.nextCode(); continue; - } else if (cp === LATIN_CAPITAL_U) { + } else if (cp === CodePoint.LATIN_CAPITAL_U) { // escape-seq-char =/ %x55 8HEXDIG ; UHHHHHHHH U+HHHHHHHH const code = this.parseUnicode(8); out += String.fromCodePoint(code); cp = this.nextCode(); continue; - } else if (cp === LATIN_SMALL_X && this.tomlVersion.gte(1, 1)) { + } else if ( + cp === CodePoint.LATIN_SMALL_X && + this.tomlVersion.gte(1, 1) + ) { // escape-seq-char =/ %x78 2HEXDIG ; xHH U+00HH // Added in TOML 1.1 const code = this.parseUnicode(2); @@ -619,7 +546,7 @@ export class Tokenizer { out += this.currChar(cp); cp = this.nextCode(); } - if (cp !== QUOTATION_MARK) { + if (cp !== CodePoint.QUOTATION_MARK) { return this.reportParseError("unterminated-string"); } this.endToken("BasicString", "end", out); @@ -628,25 +555,25 @@ export class Tokenizer { private MULTI_LINE_BASIC_STRING(cp: number): TokenizerState { let out = ""; - if (cp === LINE_FEED) { + if (cp === CodePoint.LINE_FEED) { // A newline immediately following the opening delimiter will be trimmed. cp = this.nextCode(); } - while (cp !== EOF) { - if (cp !== LINE_FEED && isControlOtherThanTab(cp)) { + while (cp !== CodePoint.EOF) { + if (cp !== CodePoint.LINE_FEED && isControlOtherThanTab(cp)) { return this.reportParseErrorControlChar(); } - if (cp === QUOTATION_MARK) { + if (cp === CodePoint.QUOTATION_MARK) { const nextPoints = this.codePointIterator.subCodePoints(); if ( - nextPoints.next() === QUOTATION_MARK && - nextPoints.next() === QUOTATION_MARK + nextPoints.next() === CodePoint.QUOTATION_MARK && + nextPoints.next() === CodePoint.QUOTATION_MARK ) { - if (nextPoints.next() === QUOTATION_MARK) { + if (nextPoints.next() === CodePoint.QUOTATION_MARK) { out += '"'; - if (nextPoints.next() === QUOTATION_MARK) { + if (nextPoints.next() === CodePoint.QUOTATION_MARK) { out += '"'; - if (nextPoints.next() === QUOTATION_MARK) { + if (nextPoints.next() === CodePoint.QUOTATION_MARK) { return this.reportParseError("invalid-three-quotes"); } } @@ -657,42 +584,45 @@ export class Tokenizer { return "DATA"; } } - if (cp === BACKSLASH) { + if (cp === CodePoint.BACKSLASH) { cp = this.nextCode(); const ecp = this.ESCAPES[cp]; if (ecp) { out += ecp; cp = this.nextCode(); continue; - } else if (cp === LATIN_SMALL_U) { + } else if (cp === CodePoint.LATIN_SMALL_U) { // escape-seq-char =/ %x75 4HEXDIG ; uHHHH U+HHHH const code = this.parseUnicode(4); out += String.fromCodePoint(code); cp = this.nextCode(); continue; - } else if (cp === LATIN_CAPITAL_U) { + } else if (cp === CodePoint.LATIN_CAPITAL_U) { // escape-seq-char =/ %x55 8HEXDIG ; UHHHHHHHH U+HHHHHHHH const code = this.parseUnicode(8); out += String.fromCodePoint(code); cp = this.nextCode(); continue; - } else if (cp === LATIN_SMALL_X && this.tomlVersion.gte(1, 1)) { + } else if ( + cp === CodePoint.LATIN_SMALL_X && + this.tomlVersion.gte(1, 1) + ) { // escape-seq-char =/ %x78 2HEXDIG ; xHH U+00HH // Added in TOML 1.1 const code = this.parseUnicode(2); out += String.fromCodePoint(code); cp = this.nextCode(); continue; - } else if (cp === LINE_FEED) { + } else if (cp === CodePoint.LINE_FEED) { cp = this.nextCode(); - while (isWhitespace(cp) || cp === LINE_FEED) { + while (isWhitespace(cp) || cp === CodePoint.LINE_FEED) { cp = this.nextCode(); } continue; } else if (isWhitespace(cp)) { let valid = true; for (const nextCp of this.codePointIterator.iterateSubCodePoints()) { - if (nextCp === LINE_FEED) { + if (nextCp === CodePoint.LINE_FEED) { break; } if (!isWhitespace(nextCp)) { @@ -702,7 +632,7 @@ export class Tokenizer { } if (valid) { cp = this.nextCode(); - while (isWhitespace(cp) || cp === LINE_FEED) { + while (isWhitespace(cp) || cp === CodePoint.LINE_FEED) { cp = this.nextCode(); } continue; @@ -718,23 +648,27 @@ export class Tokenizer { } private LITERAL_STRING(cp: number): TokenizerState { - if (cp === SINGLE_QUOTE) { + if (cp === CodePoint.SINGLE_QUOTE) { cp = this.nextCode(); - if (cp === SINGLE_QUOTE) { + if (cp === CodePoint.SINGLE_QUOTE) { return "MULTI_LINE_LITERAL_STRING"; } this.endToken("LiteralString", "start", ""); return this.back("DATA"); } let out = ""; - while (cp !== SINGLE_QUOTE && cp !== EOF && cp !== LINE_FEED) { + while ( + cp !== CodePoint.SINGLE_QUOTE && + cp !== CodePoint.EOF && + cp !== CodePoint.LINE_FEED + ) { if (isControlOtherThanTab(cp)) { return this.reportParseErrorControlChar(); } out += this.currChar(cp); cp = this.nextCode(); } - if (cp !== SINGLE_QUOTE) { + if (cp !== CodePoint.SINGLE_QUOTE) { return this.reportParseError("unterminated-string"); } this.endToken("LiteralString", "end", out); @@ -743,25 +677,25 @@ export class Tokenizer { private MULTI_LINE_LITERAL_STRING(cp: number): TokenizerState { let out = ""; - if (cp === LINE_FEED) { + if (cp === CodePoint.LINE_FEED) { // A newline immediately following the opening delimiter will be trimmed. cp = this.nextCode(); } - while (cp !== EOF) { - if (cp !== LINE_FEED && isControlOtherThanTab(cp)) { + while (cp !== CodePoint.EOF) { + if (cp !== CodePoint.LINE_FEED && isControlOtherThanTab(cp)) { return this.reportParseErrorControlChar(); } - if (cp === SINGLE_QUOTE) { + if (cp === CodePoint.SINGLE_QUOTE) { const nextPoints = this.codePointIterator.subCodePoints(); if ( - nextPoints.next() === SINGLE_QUOTE && - nextPoints.next() === SINGLE_QUOTE + nextPoints.next() === CodePoint.SINGLE_QUOTE && + nextPoints.next() === CodePoint.SINGLE_QUOTE ) { - if (nextPoints.next() === SINGLE_QUOTE) { + if (nextPoints.next() === CodePoint.SINGLE_QUOTE) { out += "'"; - if (nextPoints.next() === SINGLE_QUOTE) { + if (nextPoints.next() === CodePoint.SINGLE_QUOTE) { out += "'"; - if (nextPoints.next() === SINGLE_QUOTE) { + if (nextPoints.next() === CodePoint.SINGLE_QUOTE) { return this.reportParseError("invalid-three-quotes"); } } @@ -779,7 +713,7 @@ export class Tokenizer { } private SIGN(cp: number): TokenizerState { - if (cp === LATIN_SMALL_N || cp === LATIN_SMALL_I) { + if (cp === CodePoint.LATIN_SMALL_N || cp === CodePoint.LATIN_SMALL_I) { return this.back("NAN_OR_INF"); } if (isDigit(cp)) { @@ -789,21 +723,21 @@ export class Tokenizer { } private NAN_OR_INF(cp: number): TokenizerState { - if (cp === LATIN_SMALL_N) { + if (cp === CodePoint.LATIN_SMALL_N) { const codePoints = this.codePointIterator.subCodePoints(); if ( - codePoints.next() === LATIN_SMALL_A && - codePoints.next() === LATIN_SMALL_N + codePoints.next() === CodePoint.LATIN_SMALL_A && + codePoints.next() === CodePoint.LATIN_SMALL_N ) { this.skip(2); this.endToken("Float", "end", NaN); return "DATA"; } - } else if (cp === LATIN_SMALL_I) { + } else if (cp === CodePoint.LATIN_SMALL_I) { const codePoints = this.codePointIterator.subCodePoints(); if ( - codePoints.next() === LATIN_SMALL_N && - codePoints.next() === LATIN_SMALL_F + codePoints.next() === CodePoint.LATIN_SMALL_N && + codePoints.next() === CodePoint.LATIN_SMALL_F ) { this.skip(2); this.endToken( @@ -819,9 +753,14 @@ export class Tokenizer { private NUMBER(cp: number): TokenizerState { const start = this.text[this.tokenStart.offset]; - const sign = start === "+" ? PLUS_SIGN : start === "-" ? DASH : NULL; - if (cp === DIGIT_0) { - if (sign === NULL) { + const sign = + start === "+" + ? CodePoint.PLUS_SIGN + : start === "-" + ? CodePoint.DASH + : CodePoint.NULL; + if (cp === CodePoint.DIGIT_0) { + if (sign === CodePoint.NULL) { const subCodePoints = this.codePointIterator.subCodePoints(); const nextCp = subCodePoints.next(); if (isDigit(nextCp)) { @@ -829,10 +768,10 @@ export class Tokenizer { if ( (isDigit(nextNextCp) && isDigit(subCodePoints.next()) && - subCodePoints.next() === DASH) || - nextNextCp === COLON + subCodePoints.next() === CodePoint.DASH) || + nextNextCp === CodePoint.COLON ) { - const isDate = nextNextCp !== COLON; + const isDate = nextNextCp !== CodePoint.COLON; const data: DateTimeData = { hasDate: isDate, year: 0, @@ -851,31 +790,31 @@ export class Tokenizer { cp = this.nextCode(); if ( - cp === LATIN_SMALL_X || - cp === LATIN_SMALL_O || - cp === LATIN_SMALL_B + cp === CodePoint.LATIN_SMALL_X || + cp === CodePoint.LATIN_SMALL_O || + cp === CodePoint.LATIN_SMALL_B ) { - if (sign !== NULL) { + if (sign !== CodePoint.NULL) { return this.reportParseError("unexpected-char"); } - return cp === LATIN_SMALL_X + return cp === CodePoint.LATIN_SMALL_X ? "HEX" - : cp === LATIN_SMALL_O + : cp === CodePoint.LATIN_SMALL_O ? "OCTAL" : "BINARY"; } - if (cp === LATIN_SMALL_E || cp === LATIN_CAPITAL_E) { + if (cp === CodePoint.LATIN_SMALL_E || cp === CodePoint.LATIN_CAPITAL_E) { const data: ExponentData = { // Float values -0.0 and +0.0 are valid and should map according to IEEE 754. - minus: sign === DASH, + minus: sign === CodePoint.DASH, left: "0", }; this.data = data; return "EXPONENT_RIGHT"; } - if (cp === DOT) { + if (cp === CodePoint.DOT) { const data: FractionalData = { - minus: sign === DASH, + minus: sign === CodePoint.DASH, absInt: "0", }; this.data = data; @@ -888,8 +827,8 @@ export class Tokenizer { const { out, nextCp, hasUnderscore } = this.parseDigits(cp, isDigit); if ( - nextCp === DASH && - sign === NULL && + nextCp === CodePoint.DASH && + sign === CodePoint.NULL && !hasUnderscore && out.length === 4 ) { @@ -906,8 +845,8 @@ export class Tokenizer { return "DATE_MONTH"; } if ( - nextCp === COLON && - sign === NULL && + nextCp === CodePoint.COLON && + sign === CodePoint.NULL && !hasUnderscore && out.length === 2 ) { @@ -924,23 +863,31 @@ export class Tokenizer { return "TIME_MINUTE"; } - if (nextCp === LATIN_SMALL_E || nextCp === LATIN_CAPITAL_E) { + if ( + nextCp === CodePoint.LATIN_SMALL_E || + nextCp === CodePoint.LATIN_CAPITAL_E + ) { const data: ExponentData = { - minus: sign === DASH, + minus: sign === CodePoint.DASH, left: out, }; this.data = data; return "EXPONENT_RIGHT"; } - if (nextCp === DOT) { + if (nextCp === CodePoint.DOT) { const data: FractionalData = { - minus: sign === DASH, + minus: sign === CodePoint.DASH, absInt: out, }; this.data = data; return "FRACTIONAL_RIGHT"; } - this.endToken("Integer", "start", sign === DASH ? `-${out}` : out, 10); + this.endToken( + "Integer", + "start", + sign === CodePoint.DASH ? `-${out}` : out, + 10, + ); return this.back("DATA"); } @@ -957,7 +904,10 @@ export class Tokenizer { } private BINARY(cp: number): TokenizerState { - const { out } = this.parseDigits(cp, (c) => c === DIGIT_0 || c === DIGIT_1); + const { out } = this.parseDigits( + cp, + (c) => c === CodePoint.DIGIT_0 || c === CodePoint.DIGIT_1, + ); this.endToken("Integer", "start", out, 2); return this.back("DATA"); } @@ -966,7 +916,10 @@ export class Tokenizer { const { minus, absInt } = this.data! as FractionalData; const { out, nextCp } = this.parseDigits(cp, isDigit); const absNum = `${absInt}.${out}`; - if (nextCp === LATIN_SMALL_E || nextCp === LATIN_CAPITAL_E) { + if ( + nextCp === CodePoint.LATIN_SMALL_E || + nextCp === CodePoint.LATIN_CAPITAL_E + ) { const data: ExponentData = { minus, left: absNum, @@ -982,8 +935,8 @@ export class Tokenizer { private EXPONENT_RIGHT(cp: number): TokenizerState { const { left, minus: leftMinus } = this.data! as ExponentData; let minus = false; - if (cp === DASH || cp === PLUS_SIGN) { - minus = cp === DASH; + if (cp === CodePoint.DASH || cp === CodePoint.PLUS_SIGN) { + minus = cp === CodePoint.DASH; cp = this.nextCode(); } const { out } = this.parseDigits(cp, isDigit); @@ -997,25 +950,25 @@ export class Tokenizer { } private BOOLEAN(cp: number): TokenizerState { - if (cp === LATIN_SMALL_T) { + if (cp === CodePoint.LATIN_SMALL_T) { const codePoints = this.codePointIterator.subCodePoints(); if ( - codePoints.next() === LATIN_SMALL_R && - codePoints.next() === LATIN_SMALL_U && - codePoints.next() === LATIN_SMALL_E + codePoints.next() === CodePoint.LATIN_SMALL_R && + codePoints.next() === CodePoint.LATIN_SMALL_U && + codePoints.next() === CodePoint.LATIN_SMALL_E ) { // true this.skip(codePoints.count); this.endToken("Boolean", "end", true); return "DATA"; } - } else if (cp === LATIN_SMALL_F) { + } else if (cp === CodePoint.LATIN_SMALL_F) { const codePoints = this.codePointIterator.subCodePoints(); if ( - codePoints.next() === LATIN_SMALL_A && - codePoints.next() === LATIN_SMALL_L && - codePoints.next() === LATIN_SMALL_S && - codePoints.next() === LATIN_SMALL_E + codePoints.next() === CodePoint.LATIN_SMALL_A && + codePoints.next() === CodePoint.LATIN_SMALL_L && + codePoints.next() === CodePoint.LATIN_SMALL_S && + codePoints.next() === CodePoint.LATIN_SMALL_E ) { // false this.skip(codePoints.count); @@ -1046,7 +999,7 @@ export class Tokenizer { return this.reportParseError("unexpected-char"); } cp = this.nextCode(); - if (cp !== DASH) { + if (cp !== CodePoint.DASH) { return this.reportParseError("unexpected-char"); } const end = this.codePointIterator.start.offset; @@ -1072,10 +1025,10 @@ export class Tokenizer { } cp = this.nextCode(); - if (cp === LATIN_CAPITAL_T || cp === LATIN_SMALL_T) { + if (cp === CodePoint.LATIN_CAPITAL_T || cp === CodePoint.LATIN_SMALL_T) { return "TIME_HOUR"; } - if (cp === SPACE) { + if (cp === CodePoint.SPACE) { const subCodePoints = this.codePointIterator.subCodePoints(); if (isDigit(subCodePoints.next()) && isDigit(subCodePoints.next())) { return "TIME_HOUR"; @@ -1096,7 +1049,7 @@ export class Tokenizer { return this.reportParseError("unexpected-char"); } cp = this.nextCode(); - if (cp !== COLON) { + if (cp !== CodePoint.COLON) { return this.reportParseError("unexpected-char"); } const end = this.codePointIterator.start.offset; @@ -1118,7 +1071,7 @@ export class Tokenizer { const data: DateTimeData = this.data! as DateTimeData; data.minute = Number(this.text.slice(start, end)); cp = this.nextCode(); - if (cp === COLON) { + if (cp === CodePoint.COLON) { return "TIME_SECOND"; } if (this.tomlVersion.lt(1, 1)) { @@ -1149,7 +1102,7 @@ export class Tokenizer { } cp = this.nextCode(); - if (cp === DOT) { + if (cp === CodePoint.DOT) { return "TIME_SEC_FRAC"; } return this.processTimeEnd(cp, data); @@ -1171,11 +1124,11 @@ export class Tokenizer { private processTimeEnd(cp: number, data: DateTimeData): TokenizerState { if (data.hasDate) { - if (cp === DASH || cp === PLUS_SIGN) { + if (cp === CodePoint.DASH || cp === CodePoint.PLUS_SIGN) { data.offsetSign = cp; return "TIME_OFFSET"; } - if (cp === LATIN_CAPITAL_Z || cp === LATIN_SMALL_Z) { + if (cp === CodePoint.LATIN_CAPITAL_Z || cp === CodePoint.LATIN_SMALL_Z) { const dateValue = getDateFromDateTimeData(data, "Z"); this.endToken("OffsetDateTime", "end", dateValue); return "DATA"; @@ -1199,7 +1152,7 @@ export class Tokenizer { return this.reportParseError("unexpected-char"); } cp = this.nextCode(); - if (cp !== COLON) { + if (cp !== CodePoint.COLON) { return this.reportParseError("unexpected-char"); } const hourEnd = this.codePointIterator.start.offset; @@ -1239,19 +1192,19 @@ export class Tokenizer { nextCp: number; hasUnderscore: boolean; } { - if (cp === UNDERSCORE) { + if (cp === CodePoint.UNDERSCORE) { return this.reportParseError("invalid-underscore"); } if (!checkDigit(cp)) { return this.reportParseError("unexpected-char"); } let out = ""; - let before = NULL; + let before = CodePoint.NULL; let hasUnderscore = false; - while (checkDigit(cp) || cp === UNDERSCORE) { - if (cp === UNDERSCORE) { + while (checkDigit(cp) || cp === CodePoint.UNDERSCORE) { + if (cp === CodePoint.UNDERSCORE) { hasUnderscore = true; - if (before === UNDERSCORE) { + if (before === CodePoint.UNDERSCORE) { return this.reportParseError("invalid-underscore"); } } else { @@ -1260,7 +1213,7 @@ export class Tokenizer { before = cp; cp = this.nextCode(); } - if (before === UNDERSCORE) { + if (before === CodePoint.UNDERSCORE) { return this.reportParseError("invalid-underscore"); } return { @@ -1297,7 +1250,7 @@ export class Tokenizer { } private currChar(cp: number): string { - if (cp === LINE_FEED) return "\n"; + if (cp === CodePoint.LINE_FEED) return "\n"; if (cp < 0x10000) return this.text[this.codePointIterator.start.offset]; return this.text.slice( this.codePointIterator.start.offset, @@ -1311,7 +1264,12 @@ export class Tokenizer { */ function isUnquotedKeyChar(cp: number, tomlVersion: TOMLVer): boolean { // unquoted-key-char = ALPHA / DIGIT / %x2D / %x5F ; a-z A-Z 0-9 - _ - if (isLetter(cp) || isDigit(cp) || cp === UNDERSCORE || cp === DASH) { + if ( + isLetter(cp) || + isDigit(cp) || + cp === CodePoint.UNDERSCORE || + cp === CodePoint.DASH + ) { return true; } if (tomlVersion.lt(1, 1)) { @@ -1323,60 +1281,65 @@ function isUnquotedKeyChar(cp: number, tomlVersion: TOMLVer): boolean { // Other unquoted-key-char // Added in TOML 1.1 if ( - cp === SUPERSCRIPT_TWO || - cp === SUPERSCRIPT_THREE || - cp === SUPERSCRIPT_ONE || - (VULGAR_FRACTION_ONE_QUARTER <= cp && cp <= VULGAR_FRACTION_THREE_QUARTERS) + cp === CodePoint.SUPERSCRIPT_TWO || + cp === CodePoint.SUPERSCRIPT_THREE || + cp === CodePoint.SUPERSCRIPT_ONE || + (CodePoint.VULGAR_FRACTION_ONE_QUARTER <= cp && + cp <= CodePoint.VULGAR_FRACTION_THREE_QUARTERS) ) { // unquoted-key-char =/ %xB2 / %xB3 / %xB9 / %xBC-BE ; superscript digits, fractions return true; } if ( - (LATIN_CAPITAL_LETTER_A_WITH_GRAVE <= cp && - cp <= LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS) || - (LATIN_CAPITAL_LETTER_O_WITH_STROKE <= cp && - cp <= LATIN_SMALL_LETTER_O_WITH_DIAERESIS) || - (LATIN_SMALL_LETTER_O_WITH_STROKE <= cp && - cp <= GREEK_SMALL_REVERSED_DOTTED_LUNATE_SIGMA_SYMBOL) + (CodePoint.LATIN_CAPITAL_LETTER_A_WITH_GRAVE <= cp && + cp <= CodePoint.LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS) || + (CodePoint.LATIN_CAPITAL_LETTER_O_WITH_STROKE <= cp && + cp <= CodePoint.LATIN_SMALL_LETTER_O_WITH_DIAERESIS) || + (CodePoint.LATIN_SMALL_LETTER_O_WITH_STROKE <= cp && + cp <= CodePoint.GREEK_SMALL_REVERSED_DOTTED_LUNATE_SIGMA_SYMBOL) ) { // unquoted-key-char =/ %xC0-D6 / %xD8-F6 / %xF8-37D ; non-symbol chars in Latin block return true; } - if (GREEK_CAPITAL_LETTER_YOT <= cp && cp <= CP_1FFF) { + if (CodePoint.GREEK_CAPITAL_LETTER_YOT <= cp && cp <= CodePoint.CP_1FFF) { // unquoted-key-char =/ %x37F-1FFF ; exclude GREEK QUESTION MARK, which is basically a semi-colon return true; } if ( - (ZERO_WIDTH_NON_JOINER <= cp && cp <= ZERO_WIDTH_JOINER) || - (UNDERTIE <= cp && cp <= CHARACTER_TIE) + (CodePoint.ZERO_WIDTH_NON_JOINER <= cp && + cp <= CodePoint.ZERO_WIDTH_JOINER) || + (CodePoint.UNDERTIE <= cp && cp <= CodePoint.CHARACTER_TIE) ) { // unquoted-key-char =/ %x200C-200D / %x203F-2040 ; from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ return true; } if ( - (SUPERSCRIPT_ZERO <= cp && cp <= CP_218F) || - (CIRCLED_DIGIT_ONE <= cp && cp <= NEGATIVE_CIRCLED_DIGIT_ZERO) + (CodePoint.SUPERSCRIPT_ZERO <= cp && cp <= CodePoint.CP_218F) || + (CodePoint.CIRCLED_DIGIT_ONE <= cp && + cp <= CodePoint.NEGATIVE_CIRCLED_DIGIT_ZERO) ) { // unquoted-key-char =/ %x2070-218F / %x2460-24FF ; include super-/subscripts, letterlike/numberlike forms, enclosed alphanumerics return true; } if ( - (GLAGOLITIC_CAPITAL_LETTER_AZU <= cp && cp <= CP_2FEF) || - (IDEOGRAPHIC_COMMA <= cp && cp <= CP_D7FF) + (CodePoint.GLAGOLITIC_CAPITAL_LETTER_AZU <= cp && + cp <= CodePoint.CP_2FEF) || + (CodePoint.IDEOGRAPHIC_COMMA <= cp && cp <= CodePoint.CP_D7FF) ) { // unquoted-key-char =/ %x2C00-2FEF / %x3001-D7FF ; skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces return true; } if ( - (CJK_COMPATIBILITY_IDEOGRAPH_F900 <= cp && - cp <= ARABIC_LIGATURE_SALAAMUHU_ALAYNAA) || - (ARABIC_LIGATURE_SALLA_USED_AS_KORANIC_STOP_SIGN_ISOLATED_FORM <= cp && - cp <= REPLACEMENT_CHARACTER) + (CodePoint.CJK_COMPATIBILITY_IDEOGRAPH_F900 <= cp && + cp <= CodePoint.ARABIC_LIGATURE_SALAAMUHU_ALAYNAA) || + (CodePoint.ARABIC_LIGATURE_SALLA_USED_AS_KORANIC_STOP_SIGN_ISOLATED_FORM <= + cp && + cp <= CodePoint.REPLACEMENT_CHARACTER) ) { // unquoted-key-char =/ %xF900-FDCF / %xFDF0-FFFD ; skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode) return true; } - if (LINEAR_B_SYLLABLE_B008_A <= cp && cp <= CP_EFFFF) { + if (CodePoint.LINEAR_B_SYLLABLE_B008_A <= cp && cp <= CodePoint.CP_EFFFF) { // unquoted-key-char =/ %x10000-EFFFF ; all chars outside BMP range, excluding Private Use planes (F0000-10FFFF) return true; } @@ -1388,7 +1351,9 @@ function isUnquotedKeyChar(cp: number, tomlVersion: TOMLVer): boolean { * Check whether the code point is control character other than tab */ function isControlOtherThanTab(cp: number): boolean { - return (isControl(cp) && cp !== TABULATION) || cp === DELETE; + return ( + (isControl(cp) && cp !== CodePoint.TABULATION) || cp === CodePoint.DELETE + ); } /** @@ -1397,8 +1362,8 @@ function isControlOtherThanTab(cp: number): boolean { function isAllowedCommentCharacter(cp: number): boolean { // allowed-comment-char = %x01-09 / %x0E-7F / non-ascii return ( - (SOH <= cp && cp <= TABULATION) || - (SO <= cp && cp <= DELETE) || + (CodePoint.SOH <= cp && cp <= CodePoint.TABULATION) || + (CodePoint.SO <= cp && cp <= CodePoint.DELETE) || isNonAscii(cp) ); } @@ -1408,7 +1373,10 @@ function isAllowedCommentCharacter(cp: number): boolean { */ function isNonAscii(cp: number): boolean { // %x80-D7FF / %xE000-10FFFF - return (PAD <= cp && cp <= CP_D7FF) || (CP_E000 <= cp && cp <= CP_10FFFF); + return ( + (CodePoint.PAD <= cp && cp <= CodePoint.CP_D7FF) || + (CodePoint.CP_E000 <= cp && cp <= CodePoint.CP_10FFFF) + ); } /** From f88bc0e3006f889d31b3d75789cf5994af990d70 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Thu, 9 Nov 2023 08:11:26 +0900 Subject: [PATCH 2/5] update --- .github/workflows/NewOldBenchmark.yml | 20 ++++++++++++++++++++ package.json | 1 + 2 files changed, 21 insertions(+) create mode 100644 .github/workflows/NewOldBenchmark.yml diff --git a/.github/workflows/NewOldBenchmark.yml b/.github/workflows/NewOldBenchmark.yml new file mode 100644 index 0000000..5a73843 --- /dev/null +++ b/.github/workflows/NewOldBenchmark.yml @@ -0,0 +1,20 @@ +name: NewOldBenchmark + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + - name: Install Packages + run: npm install -f + - name: Build + run: npm run build + - name: Benchmark + run: npm run benchmark diff --git a/package.json b/package.json index c6d0b23..0e69fa0 100644 --- a/package.json +++ b/package.json @@ -80,6 +80,7 @@ "nyc": "^15.1.0", "prettier": "^3.0.0", "semver": "^7.3.4", + "toml-eslint-parser": "^0.8.0", "ts-node": "^10.0.0", "typescript": "~5.0.0", "vue-eslint-parser": "^9.0.0" From ff3fb049ab9ae8421cf2bead06f17172c3a3366d Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Thu, 9 Nov 2023 08:13:02 +0900 Subject: [PATCH 3/5] fix --- .github/workflows/NewOldBenchmark.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/NewOldBenchmark.yml b/.github/workflows/NewOldBenchmark.yml index 5a73843..1e1bbd5 100644 --- a/.github/workflows/NewOldBenchmark.yml +++ b/.github/workflows/NewOldBenchmark.yml @@ -12,6 +12,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 + - name: Setup + run: npm run setup - name: Install Packages run: npm install -f - name: Build From 4e06b587073b6595396ba3ecb04006d48b0cd372 Mon Sep 17 00:00:00 2001 From: Yosuke Ota Date: Thu, 9 Nov 2023 08:15:13 +0900 Subject: [PATCH 4/5] Create slow-chefs-study.md --- .changeset/slow-chefs-study.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/slow-chefs-study.md diff --git a/.changeset/slow-chefs-study.md b/.changeset/slow-chefs-study.md new file mode 100644 index 0000000..267fc5e --- /dev/null +++ b/.changeset/slow-chefs-study.md @@ -0,0 +1,5 @@ +--- +"toml-eslint-parser": patch +--- + +improve performance From 4208459f55e4abec37c44f7d663ff21916d92d8f Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Thu, 9 Nov 2023 08:21:21 +0900 Subject: [PATCH 5/5] fix --- .eslintrc.js | 2 ++ src/tokenizer/code-point.ts | 1 - src/toml-parser/keys-resolver.ts | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.eslintrc.js b/.eslintrc.js index 756f83b..847c2ec 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -22,6 +22,7 @@ module.exports = { "no-warning-comments": "warn", "no-lonely-if": "off", "no-param-reassign": "off", + "no-shadow": "off", }, overrides: [ { @@ -32,6 +33,7 @@ module.exports = { project: "./tsconfig.json", }, rules: { + "@typescript-eslint/no-shadow": "error", "@typescript-eslint/naming-convention": [ "error", { diff --git a/src/tokenizer/code-point.ts b/src/tokenizer/code-point.ts index 6a4c1a2..e199a68 100644 --- a/src/tokenizer/code-point.ts +++ b/src/tokenizer/code-point.ts @@ -1,4 +1,3 @@ -// eslint-disable-next-line no-shadow -- bug? export const enum CodePoint { EOF = -1, NULL = 0x00, diff --git a/src/toml-parser/keys-resolver.ts b/src/toml-parser/keys-resolver.ts index a62707e..0565860 100644 --- a/src/toml-parser/keys-resolver.ts +++ b/src/toml-parser/keys-resolver.ts @@ -11,7 +11,6 @@ import type { import { last, toKeyName } from "../internal-utils"; import type { Context } from "./context"; -// eslint-disable-next-line no-shadow -- ?? const enum ValueKind { VALUE, INTERMEDIATE,