Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve performance #166

Merged
merged 5 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/slow-chefs-study.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"toml-eslint-parser": patch
---

improve performance
2 changes: 2 additions & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ module.exports = {
"no-warning-comments": "warn",
"no-lonely-if": "off",
"no-param-reassign": "off",
"no-shadow": "off",
},
overrides: [
{
Expand All @@ -32,6 +33,7 @@ module.exports = {
project: "./tsconfig.json",
},
rules: {
"@typescript-eslint/no-shadow": "error",
"@typescript-eslint/naming-convention": [
"error",
{
Expand Down
22 changes: 22 additions & 0 deletions .github/workflows/NewOldBenchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: NewOldBenchmark

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
- name: Setup
run: npm run setup
- name: Install Packages
run: npm install -f
- name: Build
run: npm run build
- name: Benchmark
run: npm run benchmark
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
"nyc": "^15.1.0",
"prettier": "^3.0.0",
"semver": "^7.3.4",
"toml-eslint-parser": "^0.8.0",
"ts-node": "^10.0.0",
"typescript": "~5.0.0",
"vue-eslint-parser": "^9.0.0"
Expand Down
36 changes: 18 additions & 18 deletions src/tokenizer/code-point-iterator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { NULL, EOF, LINE_FEED, CARRIAGE_RETURN } from "./code-point";
import { CodePoint } from "./code-point";

type Position = {
offset: number;
Expand All @@ -9,7 +9,7 @@ type Position = {
export class CodePointIterator {
public readonly text: string;

private lastCodePoint: number = NULL;
private lastCodePoint: number = CodePoint.NULL;

public start: Position = {
offset: -1,
Expand All @@ -31,31 +31,31 @@ export class CodePointIterator {
}

public next(): number {
if (this.lastCodePoint === EOF) {
return EOF;
if (this.lastCodePoint === CodePoint.EOF) {
return CodePoint.EOF;
}

this.start.offset = this.end.offset;
this.start.line = this.end.line;
this.start.column = this.end.column;

const cp = this.text.codePointAt(this.start.offset) ?? EOF;
if (cp === EOF) {
const cp = this.text.codePointAt(this.start.offset) ?? CodePoint.EOF;
if (cp === CodePoint.EOF) {
this.end = this.start;
return (this.lastCodePoint = cp);
}
const shift = cp >= 0x10000 ? 2 : 1;
this.end.offset = this.start.offset + shift;
if (cp === LINE_FEED) {
if (cp === CodePoint.LINE_FEED) {
this.end.line = this.start.line + 1;
this.end.column = 0;
} else if (cp === CARRIAGE_RETURN) {
if (this.text.codePointAt(this.end.offset) === LINE_FEED) {
} else if (cp === CodePoint.CARRIAGE_RETURN) {
if (this.text.codePointAt(this.end.offset) === CodePoint.LINE_FEED) {
this.end.offset++;
this.end.line = this.start.line + 1;
this.end.column = 0;
}
return (this.lastCodePoint = LINE_FEED);
return (this.lastCodePoint = CodePoint.LINE_FEED);
} else {
this.end.column = this.start.column + shift;
}
Expand All @@ -66,15 +66,15 @@ export class CodePointIterator {
public *iterateSubCodePoints(): IterableIterator<number> {
let index = this.end.offset;
while (true) {
let cp = this.text.codePointAt(index) ?? EOF;
if (cp === CARRIAGE_RETURN) {
if (this.text.codePointAt(index) === LINE_FEED) {
cp = this.text.codePointAt(++index) ?? EOF;
let cp = this.text.codePointAt(index) ?? CodePoint.EOF;
if (cp === CodePoint.CARRIAGE_RETURN) {
if (this.text.codePointAt(index) === CodePoint.LINE_FEED) {
cp = this.text.codePointAt(++index) ?? CodePoint.EOF;
} else {
cp = LINE_FEED;
cp = CodePoint.LINE_FEED;
}
}
if (cp === EOF) {
if (cp === CodePoint.EOF) {
return;
}
yield cp;
Expand All @@ -92,12 +92,12 @@ export class CodePointIterator {
return {
next() {
if (end) {
return EOF;
return CodePoint.EOF;
}
const r = sub.next();
if (r.done) {
end = true;
return EOF;
return CodePoint.EOF;
}
count++;
return r.value;
Expand Down
196 changes: 99 additions & 97 deletions src/tokenizer/code-point.ts
Original file line number Diff line number Diff line change
@@ -1,125 +1,127 @@
export const EOF = -1;
export const NULL = 0x00;
export const SOH = 0x01;
export const BACKSPACE = 0x08;
export const TABULATION = 0x09;
export const LINE_FEED = 0x0a;
export const FORM_FEED = 0x0c;
export const CARRIAGE_RETURN = 0x0d;
export const ESCAPE = 0x1b;
export const SO = 0x0e;
export const US = 0x1f;
export const SPACE = 0x20;
export const QUOTATION_MARK = 0x22;
export const HASH = 0x23;
export const SINGLE_QUOTE = 0x27;
export const PLUS_SIGN = 0x2b;
export const COMMA = 0x2c;
export const DASH = 0x2d;
export const DOT = 0x2e;
export const DIGIT_0 = 0x30;
export const DIGIT_1 = 0x31;
export const DIGIT_2 = 0x32;
export const DIGIT_3 = 0x33;
export const DIGIT_7 = 0x37;
export const DIGIT_9 = 0x39;
export const COLON = 0x3a;
export const EQUALS_SIGN = 0x3d;
export const LATIN_CAPITAL_A = 0x41;
export const LATIN_CAPITAL_E = 0x45;
export const LATIN_CAPITAL_F = 0x46;
export const LATIN_CAPITAL_T = 0x54;
export const LATIN_CAPITAL_U = 0x55;
export const LATIN_CAPITAL_Z = 0x5a;
export const LEFT_BRACKET = 0x5b; // [
export const BACKSLASH = 0x5c;
export const RIGHT_BRACKET = 0x5d; // ]
export const UNDERSCORE = 0x5f;
export const LATIN_SMALL_A = 0x61;
export const LATIN_SMALL_B = 0x62;
export const LATIN_SMALL_E = 0x65;
export const LATIN_SMALL_F = 0x66;
export const LATIN_SMALL_I = 0x69;
export const LATIN_SMALL_L = 0x6c;
export const LATIN_SMALL_N = 0x6e;
export const LATIN_SMALL_O = 0x6f;
export const LATIN_SMALL_R = 0x72;
export const LATIN_SMALL_S = 0x73;
export const LATIN_SMALL_T = 0x74;
export const LATIN_SMALL_U = 0x75;
export const LATIN_SMALL_X = 0x78;
export const LATIN_SMALL_Z = 0x7a;
export const LEFT_BRACE = 0x7b; // {
export const RIGHT_BRACE = 0x7d; // }
export const DELETE = 0x7f;
export const PAD = 0x80;
export const SUPERSCRIPT_TWO = 0xb2;
export const SUPERSCRIPT_THREE = 0xb3;
export const SUPERSCRIPT_ONE = 0xb9;
export const VULGAR_FRACTION_ONE_QUARTER = 0xbc;
export const VULGAR_FRACTION_THREE_QUARTERS = 0xbe;
export const LATIN_CAPITAL_LETTER_A_WITH_GRAVE = 0xc0;
export const LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS = 0xd6;
export const LATIN_CAPITAL_LETTER_O_WITH_STROKE = 0xd8;
export const LATIN_SMALL_LETTER_O_WITH_DIAERESIS = 0xf6;
export const LATIN_SMALL_LETTER_O_WITH_STROKE = 0xf8;
export const GREEK_SMALL_REVERSED_DOTTED_LUNATE_SIGMA_SYMBOL = 0x37b;
export const GREEK_CAPITAL_LETTER_YOT = 0x37f;
export const CP_1FFF = 0x1fff;
export const ZERO_WIDTH_NON_JOINER = 0x200c;
export const ZERO_WIDTH_JOINER = 0x200d;
export const UNDERTIE = 0x203f;
export const CHARACTER_TIE = 0x2040;
export const SUPERSCRIPT_ZERO = 0x2070;
export const CP_218F = 0x218f;
export const CIRCLED_DIGIT_ONE = 0x2460;
export const NEGATIVE_CIRCLED_DIGIT_ZERO = 0x24ff;
export const GLAGOLITIC_CAPITAL_LETTER_AZU = 0x2c00;
export const CP_2FEF = 0x2fef;
export const IDEOGRAPHIC_COMMA = 0x3001;
export const CP_D7FF = 0xd7ff;
export const CP_E000 = 0xe000;
export const CJK_COMPATIBILITY_IDEOGRAPH_F900 = 0xf900;
export const ARABIC_LIGATURE_SALAAMUHU_ALAYNAA = 0xfdcf;
export const ARABIC_LIGATURE_SALLA_USED_AS_KORANIC_STOP_SIGN_ISOLATED_FORM = 0xfdf0;
export const REPLACEMENT_CHARACTER = 0xfffd;
export const LINEAR_B_SYLLABLE_B008_A = 0x10000;
export const CP_EFFFF = 0xeffff;
export const CP_10FFFF = 0x10ffff;
export const enum CodePoint {
EOF = -1,
NULL = 0x00,
SOH = 0x01,
BACKSPACE = 0x08,
TABULATION = 0x09,
LINE_FEED = 0x0a,
FORM_FEED = 0x0c,
CARRIAGE_RETURN = 0x0d,
ESCAPE = 0x1b,
SO = 0x0e,
US = 0x1f,
SPACE = 0x20,
QUOTATION_MARK = 0x22,
HASH = 0x23,
SINGLE_QUOTE = 0x27,
PLUS_SIGN = 0x2b,
COMMA = 0x2c,
DASH = 0x2d,
DOT = 0x2e,
DIGIT_0 = 0x30,
DIGIT_1 = 0x31,
DIGIT_2 = 0x32,
DIGIT_3 = 0x33,
DIGIT_7 = 0x37,
DIGIT_9 = 0x39,
COLON = 0x3a,
EQUALS_SIGN = 0x3d,
LATIN_CAPITAL_A = 0x41,
LATIN_CAPITAL_E = 0x45,
LATIN_CAPITAL_F = 0x46,
LATIN_CAPITAL_T = 0x54,
LATIN_CAPITAL_U = 0x55,
LATIN_CAPITAL_Z = 0x5a,
LEFT_BRACKET = 0x5b, // [
BACKSLASH = 0x5c,
RIGHT_BRACKET = 0x5d, // ]
UNDERSCORE = 0x5f,
LATIN_SMALL_A = 0x61,
LATIN_SMALL_B = 0x62,
LATIN_SMALL_E = 0x65,
LATIN_SMALL_F = 0x66,
LATIN_SMALL_I = 0x69,
LATIN_SMALL_L = 0x6c,
LATIN_SMALL_N = 0x6e,
LATIN_SMALL_O = 0x6f,
LATIN_SMALL_R = 0x72,
LATIN_SMALL_S = 0x73,
LATIN_SMALL_T = 0x74,
LATIN_SMALL_U = 0x75,
LATIN_SMALL_X = 0x78,
LATIN_SMALL_Z = 0x7a,
LEFT_BRACE = 0x7b, // {
RIGHT_BRACE = 0x7d, // }
DELETE = 0x7f,
PAD = 0x80,
SUPERSCRIPT_TWO = 0xb2,
SUPERSCRIPT_THREE = 0xb3,
SUPERSCRIPT_ONE = 0xb9,
VULGAR_FRACTION_ONE_QUARTER = 0xbc,
VULGAR_FRACTION_THREE_QUARTERS = 0xbe,
LATIN_CAPITAL_LETTER_A_WITH_GRAVE = 0xc0,
LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS = 0xd6,
LATIN_CAPITAL_LETTER_O_WITH_STROKE = 0xd8,
LATIN_SMALL_LETTER_O_WITH_DIAERESIS = 0xf6,
LATIN_SMALL_LETTER_O_WITH_STROKE = 0xf8,
GREEK_SMALL_REVERSED_DOTTED_LUNATE_SIGMA_SYMBOL = 0x37b,
GREEK_CAPITAL_LETTER_YOT = 0x37f,
CP_1FFF = 0x1fff,
ZERO_WIDTH_NON_JOINER = 0x200c,
ZERO_WIDTH_JOINER = 0x200d,
UNDERTIE = 0x203f,
CHARACTER_TIE = 0x2040,
SUPERSCRIPT_ZERO = 0x2070,
CP_218F = 0x218f,
CIRCLED_DIGIT_ONE = 0x2460,
NEGATIVE_CIRCLED_DIGIT_ZERO = 0x24ff,
GLAGOLITIC_CAPITAL_LETTER_AZU = 0x2c00,
CP_2FEF = 0x2fef,
IDEOGRAPHIC_COMMA = 0x3001,
CP_D7FF = 0xd7ff,
CP_E000 = 0xe000,
CJK_COMPATIBILITY_IDEOGRAPH_F900 = 0xf900,
ARABIC_LIGATURE_SALAAMUHU_ALAYNAA = 0xfdcf,
ARABIC_LIGATURE_SALLA_USED_AS_KORANIC_STOP_SIGN_ISOLATED_FORM = 0xfdf0,
REPLACEMENT_CHARACTER = 0xfffd,
LINEAR_B_SYLLABLE_B008_A = 0x10000,
CP_EFFFF = 0xeffff,
CP_10FFFF = 0x10ffff,
}

/**
* Check whether the code point is a control character.
*/
export function isControl(cp: number): boolean {
return cp >= NULL && cp <= US;
return cp >= CodePoint.NULL && cp <= CodePoint.US;
}

/**
* Check whether the code point is a whitespace.
*/
export function isWhitespace(cp: number): boolean {
return cp === TABULATION || cp === SPACE;
return cp === CodePoint.TABULATION || cp === CodePoint.SPACE;
}

/**
* Check whether the code point is a end of line.
*/
export function isEOL(cp: number): boolean {
return cp === LINE_FEED || cp === CARRIAGE_RETURN;
return cp === CodePoint.LINE_FEED || cp === CodePoint.CARRIAGE_RETURN;
}

/**
* Check whether the code point is an uppercase letter character.
*/
function isUpperLetter(cp: number): boolean {
return cp >= LATIN_CAPITAL_A && cp <= LATIN_CAPITAL_Z;
return cp >= CodePoint.LATIN_CAPITAL_A && cp <= CodePoint.LATIN_CAPITAL_Z;
}

/**
* Check whether the code point is a lowercase letter character.
*/
function isLowerLetter(cp: number): boolean {
return cp >= LATIN_SMALL_A && cp <= LATIN_SMALL_Z;
return cp >= CodePoint.LATIN_SMALL_A && cp <= CodePoint.LATIN_SMALL_Z;
}

/**
Expand All @@ -133,7 +135,7 @@ export function isLetter(cp: number): boolean {
* Check whether the code point is a digit character.
*/
export function isDigit(cp: number): boolean {
return cp >= DIGIT_0 && cp <= DIGIT_9;
return cp >= CodePoint.DIGIT_0 && cp <= CodePoint.DIGIT_9;
}

/**
Expand All @@ -142,15 +144,15 @@ export function isDigit(cp: number): boolean {
export function isHexDig(cp: number): boolean {
return (
isDigit(cp) ||
(cp >= LATIN_SMALL_A && cp <= LATIN_SMALL_F) ||
(cp >= LATIN_CAPITAL_A && cp <= LATIN_CAPITAL_F)
(cp >= CodePoint.LATIN_SMALL_A && cp <= CodePoint.LATIN_SMALL_F) ||
(cp >= CodePoint.LATIN_CAPITAL_A && cp <= CodePoint.LATIN_CAPITAL_F)
);
}
/**
* Check whether the code point is a octal digit character.
*/
export function isOctalDig(cp: number): boolean {
return cp >= DIGIT_0 && cp <= DIGIT_7;
return cp >= CodePoint.DIGIT_0 && cp <= CodePoint.DIGIT_7;
}

/**
Expand Down
Loading
Loading