From 6370cc89df7afdc212dbbbf9831c2069560f6480 Mon Sep 17 00:00:00 2001 From: Mike Lischke Date: Tue, 23 Jan 2024 19:57:54 +0100 Subject: [PATCH] Runtime API tests done --- runtime-testsuite/api.spec.ts | 34 ++- .../{tests => api}/TestCodePointCharStream.ts | 0 .../src/api/TestInterpreterDataReader.ts | 47 ++++ .../src/{tests => }/api/TestTokenStream.ts | 8 +- .../api/TestTokenStreamRewriter.ts | 10 +- .../src/{tests => }/api/TestVisitors.ts | 16 +- .../src/{tests => }/api/perf/Parser.java | 0 .../src/{tests => }/api/perf/RuleContext.java | 0 .../{tests => }/api/perf/TimeLexerSpeed.ts | 4 +- .../src/{tests => }/api/perf/emoji.txt | 0 .../src/{tests => }/api/perf/udhr_hin.txt | 0 .../src/{tests => }/api/perf/udhr_kor.txt | 0 runtime-testsuite/src/helpers/Character.ts | 4 + runtime-testsuite/src/helpers/package_js.json | 1 - runtime-testsuite/src/helpers/package_ts.json | 9 - .../src/tests/TestInterpreterDataReader.ts | 57 ---- runtime-testsuite/src/tests/TsNodeRunner.ts | 88 ------ .../src/tests/TypeScriptRuntimeTests.ts | 17 -- tool/src/org/antlr/v4/automata/ATNPrinter.ts | 136 ++++++++++ tool/src/org/antlr/v4/misc/CharSupport.ts | 251 ++++++++++++++++++ tool/src/org/antlr/v4/misc/helpers.ts | 39 +++ 21 files changed, 524 insertions(+), 197 deletions(-) rename runtime-testsuite/src/{tests => api}/TestCodePointCharStream.ts (100%) create mode 100644 runtime-testsuite/src/api/TestInterpreterDataReader.ts rename runtime-testsuite/src/{tests => }/api/TestTokenStream.ts (82%) rename runtime-testsuite/src/{tests => }/api/TestTokenStreamRewriter.ts (98%) rename runtime-testsuite/src/{tests => }/api/TestVisitors.ts (93%) rename runtime-testsuite/src/{tests => }/api/perf/Parser.java (100%) rename runtime-testsuite/src/{tests => }/api/perf/RuleContext.java (100%) rename runtime-testsuite/src/{tests => }/api/perf/TimeLexerSpeed.ts (98%) rename runtime-testsuite/src/{tests => }/api/perf/emoji.txt (100%) rename runtime-testsuite/src/{tests => }/api/perf/udhr_hin.txt (100%) rename runtime-testsuite/src/{tests => }/api/perf/udhr_kor.txt (100%) delete mode 100644 runtime-testsuite/src/helpers/package_js.json delete mode 100644 runtime-testsuite/src/helpers/package_ts.json delete mode 100644 runtime-testsuite/src/tests/TestInterpreterDataReader.ts delete mode 100644 runtime-testsuite/src/tests/TsNodeRunner.ts delete mode 100644 runtime-testsuite/src/tests/TypeScriptRuntimeTests.ts create mode 100644 tool/src/org/antlr/v4/automata/ATNPrinter.ts create mode 100644 tool/src/org/antlr/v4/misc/CharSupport.ts create mode 100644 tool/src/org/antlr/v4/misc/helpers.ts diff --git a/runtime-testsuite/api.spec.ts b/runtime-testsuite/api.spec.ts index 39d8291..e69ba15 100644 --- a/runtime-testsuite/api.spec.ts +++ b/runtime-testsuite/api.spec.ts @@ -3,12 +3,34 @@ * Licensed under the MIT License. See License.txt in the project root for license information. */ -import { TestTokenStreamRewriter } from "./src/tests/api/TestTokenStreamRewriter.js"; +import { TestCodePointCharStream } from "./src/api/TestCodePointCharStream.js"; +import { TestInterpreterDataReader } from "./src/api/TestInterpreterDataReader.js"; +import { TestTokenStream } from "./src/api/TestTokenStream.js"; +import { TestTokenStreamRewriter } from "./src/api/TestTokenStreamRewriter.js"; +import { TestVisitors } from "./src/api/TestVisitors.js"; import { TestNG } from "./utils/TestNG.js"; -describe("Direct API Tests", () => { - describe("TokenStreamRewriter", () => { - const testNG = new TestNG(); - testNG.run(TestTokenStreamRewriter); - }); +describe("TokenStreamRewriter", () => { + const testNG = new TestNG(); + testNG.run(TestTokenStreamRewriter); +}); + +describe("TestTokenStream", () => { + const testNG = new TestNG(); + testNG.run(TestTokenStream); +}); + +describe("TestVisitors", () => { + const testNG = new TestNG(); + testNG.run(TestVisitors); +}); + +describe("TestCodePointCharStream", () => { + const testNG = new TestNG(); + testNG.run(TestCodePointCharStream); +}); + +describe("TestInterpreterDataReader", () => { + const testNG = new TestNG(); + testNG.run(TestInterpreterDataReader); }); diff --git a/runtime-testsuite/src/tests/TestCodePointCharStream.ts b/runtime-testsuite/src/api/TestCodePointCharStream.ts similarity index 100% rename from runtime-testsuite/src/tests/TestCodePointCharStream.ts rename to runtime-testsuite/src/api/TestCodePointCharStream.ts diff --git a/runtime-testsuite/src/api/TestInterpreterDataReader.ts b/runtime-testsuite/src/api/TestInterpreterDataReader.ts new file mode 100644 index 0000000..fe69294 --- /dev/null +++ b/runtime-testsuite/src/api/TestInterpreterDataReader.ts @@ -0,0 +1,47 @@ +/* java2ts: keep */ + +/* + * Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import fs from "fs"; + +import { InterpreterDataReader } from "antlr4ng"; + +import { Test } from "../../utils/decorators.js"; +import { assertEquals } from "../../utils/junit.js"; + +/** + * This file represents a simple sanity checks on the parsing of the .interp file + * available to the Java runtime for interpreting rather than compiling and executing parsers. + */ +export class TestInterpreterDataReader { + @Test + public testLexerFile(): void { + const content = fs.readFileSync("runtime-testsuite/generated/VisitorCalcLexer.interp", "utf-8"); + + const interpreterData = InterpreterDataReader.parseInterpreterData(content); + + assertEquals(6, interpreterData.vocabulary.getMaxTokenType()); + assertEquals(["INT", "MUL", "DIV", "ADD", "SUB", "WS"], interpreterData.ruleNames); + assertEquals([null, null, "'*'", "'/'", "'+'", "'-'", null], interpreterData.vocabulary.getLiteralNames()); + assertEquals([null, "INT", "MUL", "DIV", "ADD", "SUB", "WS"], interpreterData.vocabulary.getSymbolicNames()); + assertEquals(["DEFAULT_TOKEN_CHANNEL", "HIDDEN"], interpreterData.channels); + assertEquals(["DEFAULT_MODE"], interpreterData.modes); + } + + @Test + public testParserFile(): void { + const content = fs.readFileSync("runtime-testsuite/generated/VisitorCalc.interp", "utf-8"); + + const interpreterData = InterpreterDataReader.parseInterpreterData(content); + + assertEquals(6, interpreterData.vocabulary.getMaxTokenType()); + assertEquals(["s", "expr"], interpreterData.ruleNames); + assertEquals([null, null, "'*'", "'/'", "'+'", "'-'", null], interpreterData.vocabulary.getLiteralNames()); + assertEquals([null, "INT", "MUL", "DIV", "ADD", "SUB", "WS"], interpreterData.vocabulary.getSymbolicNames()); + } + +} diff --git a/runtime-testsuite/src/tests/api/TestTokenStream.ts b/runtime-testsuite/src/api/TestTokenStream.ts similarity index 82% rename from runtime-testsuite/src/tests/api/TestTokenStream.ts rename to runtime-testsuite/src/api/TestTokenStream.ts index 64d295d..6d9df39 100644 --- a/runtime-testsuite/src/tests/api/TestTokenStream.ts +++ b/runtime-testsuite/src/api/TestTokenStream.ts @@ -6,11 +6,11 @@ * can be found in the LICENSE.txt file in the project root. */ -import { BufferedTokenStream, CharStreams, Token, type TokenStream } from "antlr4ng"; +import { BufferedTokenStream, CharStreams, Token } from "antlr4ng"; -import { Test } from "../../../utils/decorators.js"; -import { VisitorBasicLexer } from "../../../generated/VisitorBasicLexer.js"; -import { assertEquals } from "../../../utils/junit.js"; +import { Test } from "../../utils/decorators.js"; +import { VisitorBasicLexer } from "../../generated/VisitorBasicLexer.js"; +import { assertEquals } from "../../utils/junit.js"; /** * This class contains tests for specific API functionality in {@link TokenStream} and derived types. diff --git a/runtime-testsuite/src/tests/api/TestTokenStreamRewriter.ts b/runtime-testsuite/src/api/TestTokenStreamRewriter.ts similarity index 98% rename from runtime-testsuite/src/tests/api/TestTokenStreamRewriter.ts rename to runtime-testsuite/src/api/TestTokenStreamRewriter.ts index accba89..3067c7d 100644 --- a/runtime-testsuite/src/tests/api/TestTokenStreamRewriter.ts +++ b/runtime-testsuite/src/api/TestTokenStreamRewriter.ts @@ -10,12 +10,12 @@ import { CommonTokenStream, TokenStreamRewriter, Interval, CharStreams, Lexer, CharStream } from "antlr4ng"; -import { Test } from "../../../utils/decorators.js"; -import { assertEquals, assertNotNull } from "../../../utils/junit.js"; +import { Test } from "../../utils/decorators.js"; +import { assertEquals, assertNotNull } from "../../utils/junit.js"; -import { T1 } from "../../../generated/T1.js"; -import { T2 } from "../../../generated/T2.js"; -import { T3 } from "../../../generated/T3.js"; +import { T1 } from "../../generated/T1.js"; +import { T2 } from "../../generated/T2.js"; +import { T3 } from "../../generated/T3.js"; /** * @param lexerClass The lexer class to use. diff --git a/runtime-testsuite/src/tests/api/TestVisitors.ts b/runtime-testsuite/src/api/TestVisitors.ts similarity index 93% rename from runtime-testsuite/src/tests/api/TestVisitors.ts rename to runtime-testsuite/src/api/TestVisitors.ts index cab1f75..8337a32 100644 --- a/runtime-testsuite/src/tests/api/TestVisitors.ts +++ b/runtime-testsuite/src/api/TestVisitors.ts @@ -11,16 +11,16 @@ import { TerminalNode, CharStreams, Token, ATNSimulator, RuleContext, } from "antlr4ng"; -import { Test } from "../../../utils/decorators.js"; -import { VisitorBasicLexer } from "../../../generated/VisitorBasicLexer.js"; -import { VisitorBasicParser } from "../../../generated/VisitorBasicParser.js"; -import { VisitorBasicVisitor } from "../../../generated/VisitorBasicVisitor.js"; -import { assertEquals } from "../../../utils/junit.js"; -import { VisitorCalcLexer } from "../../../generated/VisitorCalcLexer.js"; +import { Test } from "../../utils/decorators.js"; +import { VisitorBasicLexer } from "../../generated/VisitorBasicLexer.js"; +import { VisitorBasicParser } from "../../generated/VisitorBasicParser.js"; +import { VisitorBasicVisitor } from "../../generated/VisitorBasicVisitor.js"; +import { assertEquals } from "../../utils/junit.js"; +import { VisitorCalcLexer } from "../../generated/VisitorCalcLexer.js"; import { AddContext, MultiplyContext, NumberContext, SContext, VisitorCalcParser, -} from "../../../generated/VisitorCalcParser.js"; -import { VisitorCalcVisitor } from "../../../generated/VisitorCalcVisitor.js"; +} from "../../generated/VisitorCalcParser.js"; +import { VisitorCalcVisitor } from "../../generated/VisitorCalcVisitor.js"; export class TestVisitors { diff --git a/runtime-testsuite/src/tests/api/perf/Parser.java b/runtime-testsuite/src/api/perf/Parser.java similarity index 100% rename from runtime-testsuite/src/tests/api/perf/Parser.java rename to runtime-testsuite/src/api/perf/Parser.java diff --git a/runtime-testsuite/src/tests/api/perf/RuleContext.java b/runtime-testsuite/src/api/perf/RuleContext.java similarity index 100% rename from runtime-testsuite/src/tests/api/perf/RuleContext.java rename to runtime-testsuite/src/api/perf/RuleContext.java diff --git a/runtime-testsuite/src/tests/api/perf/TimeLexerSpeed.ts b/runtime-testsuite/src/api/perf/TimeLexerSpeed.ts similarity index 98% rename from runtime-testsuite/src/tests/api/perf/TimeLexerSpeed.ts rename to runtime-testsuite/src/api/perf/TimeLexerSpeed.ts index 6788fe9..6f63454 100644 --- a/runtime-testsuite/src/tests/api/perf/TimeLexerSpeed.ts +++ b/runtime-testsuite/src/api/perf/TimeLexerSpeed.ts @@ -10,8 +10,8 @@ import { basename } from "path"; import { CharStream, CharStreams, CommonTokenStream, Lexer } from "antlr4ng"; import { printf } from "fast-printf"; -import { JavaLexer } from "../../../../generated/JavaLexer.js"; -import { graphemesLexer } from "../../../../generated/graphemesLexer.js"; +import { JavaLexer } from "../../../generated/JavaLexer.js"; +import { graphemesLexer } from "../../../generated/graphemesLexer.js"; // cspell: ignore udhr diff --git a/runtime-testsuite/src/tests/api/perf/emoji.txt b/runtime-testsuite/src/api/perf/emoji.txt similarity index 100% rename from runtime-testsuite/src/tests/api/perf/emoji.txt rename to runtime-testsuite/src/api/perf/emoji.txt diff --git a/runtime-testsuite/src/tests/api/perf/udhr_hin.txt b/runtime-testsuite/src/api/perf/udhr_hin.txt similarity index 100% rename from runtime-testsuite/src/tests/api/perf/udhr_hin.txt rename to runtime-testsuite/src/api/perf/udhr_hin.txt diff --git a/runtime-testsuite/src/tests/api/perf/udhr_kor.txt b/runtime-testsuite/src/api/perf/udhr_kor.txt similarity index 100% rename from runtime-testsuite/src/tests/api/perf/udhr_kor.txt rename to runtime-testsuite/src/api/perf/udhr_kor.txt diff --git a/runtime-testsuite/src/helpers/Character.ts b/runtime-testsuite/src/helpers/Character.ts index e426cfe..a83b8c7 100644 --- a/runtime-testsuite/src/helpers/Character.ts +++ b/runtime-testsuite/src/helpers/Character.ts @@ -417,6 +417,10 @@ export class Character { return isUpperCase(c); } + public static isISOControl(c: number): boolean { + return c <= 0x1F || (c >= 0x7F && c <= 0x9F); + } + /** * Converts the specified surrogate pair to its supplementary code point value. * diff --git a/runtime-testsuite/src/helpers/package_js.json b/runtime-testsuite/src/helpers/package_js.json deleted file mode 100644 index 1632c2c..0000000 --- a/runtime-testsuite/src/helpers/package_js.json +++ /dev/null @@ -1 +0,0 @@ -{"type": "module"} \ No newline at end of file diff --git a/runtime-testsuite/src/helpers/package_ts.json b/runtime-testsuite/src/helpers/package_ts.json deleted file mode 100644 index a2fbbbe..0000000 --- a/runtime-testsuite/src/helpers/package_ts.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "type": "module", - "devDependencies": { - "@types/node": "^18.0.5" - }, - "dependencies": { - "antlr4": "^4.13.1" - } -} diff --git a/runtime-testsuite/src/tests/TestInterpreterDataReader.ts b/runtime-testsuite/src/tests/TestInterpreterDataReader.ts deleted file mode 100644 index b50d3c0..0000000 --- a/runtime-testsuite/src/tests/TestInterpreterDataReader.ts +++ /dev/null @@ -1,57 +0,0 @@ -/* java2ts: keep */ - -/* - * Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import fs from "fs"; - -import { InterpreterDataReader } from "antlr4ng"; - -import { Test } from "../../utils/decorators.js"; -import { assertEquals, assertNull } from "../../utils/junit.js"; - -/** - * This file represents a simple sanity checks on the parsing of the .interp file - * available to the Java runtime for interpreting rather than compiling and executing parsers. - */ -export class TestInterpreterDataReader { - @Test - public testLexerFile(): void { - const content = fs.readFileSync("runtime-testsuite/test/generated/VisitorBasicLexer.interp", "utf-8"); - - const interpreterData = InterpreterDataReader.parseInterpreterData(content); - - assertEquals(6, interpreterData.vocabulary.getMaxTokenType()); - assertEquals(["A"], interpreterData.ruleNames); - assertEquals([null, "A"], interpreterData.vocabulary.getLiteralNames()); - assertEquals([null, "A"], interpreterData.vocabulary.getSymbolicNames()); - assertEquals(["DEFAULT_TOKEN_CHANNEL", "HIDDEN"], interpreterData.channels); - assertEquals(["DEFAULT_MODE"], interpreterData.modes); - - const states = interpreterData.atn.states.map((state) => { return state?.stateNumber; }); - assertEquals([4, 0, 1, 5, 6, -1, 2, 0, 7, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 4, 0, 1, 1, 0, 0, 0, 1, 3, 1, - 0, 0, 0, 3, 4, 5, 65, 0, 0, 4, 2, 1, 0, 0, 0, 1, 0, 0], states); - } - - @Test - public testParserFile(): void { - const content = fs.readFileSync("runtime-testsuite/test/generated/VisitorBasic.interp", "utf-8"); - - const interpreterData = InterpreterDataReader.parseInterpreterData(content); - - assertEquals(6, interpreterData.vocabulary.getMaxTokenType()); - assertEquals(["s"], interpreterData.ruleNames); - assertEquals([null, "A"], interpreterData.vocabulary.getLiteralNames()); - assertEquals([null, "A"], interpreterData.vocabulary.getSymbolicNames()); - assertNull(interpreterData.channels ?? null); - assertNull(interpreterData.modes ?? null); - - const states = interpreterData.atn.states.map((state) => { return state?.stateNumber; }); - assertEquals([4, 1, 1, 6, 2, 0, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 4, 0, 2, 1, 0, 0, 0, 2, 3, 5, 1, - 0, 0, 3, 4, 5, 0, 0, 1, 4, 1, 1, 0, 0, 0, 0], states); - } - -} diff --git a/runtime-testsuite/src/tests/TsNodeRunner.ts b/runtime-testsuite/src/tests/TsNodeRunner.ts deleted file mode 100644 index 99421be..0000000 --- a/runtime-testsuite/src/tests/TsNodeRunner.ts +++ /dev/null @@ -1,88 +0,0 @@ -/* java2ts: keep */ - -/* - * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { RuntimeTestUtils } from "../RuntimeTestUtils.js"; -import { RuntimeRunner } from "../RuntimeRunner.js"; -import { RunOptions } from "../RunOptions.js"; -import { FileUtils } from "../FileUtils.js"; -import { CompiledState } from "../states/CompiledState.js"; -import { GeneratedState } from "../states/GeneratedState.js"; - -export class TsNodeRunner extends RuntimeRunner { - - /* TypeScript runtime is the same as JavaScript runtime */ - private static readonly NORMALIZED_JAVASCRIPT_RUNTIME_PATH = this.getRuntimePath("JavaScript").replace("\\", "/"); - private static readonly NPM_EXEC = "npm" + (RuntimeTestUtils.isWindows() ? ".cmd" : ""); - - public override getLanguage(): string { - return "TypeScript"; - } - - public override getExtension(): string { - return "ts"; - } - - public override getBaseListenerSuffix(): string | null { return null; } - - public override getBaseVisitorSuffix(): string | null { return null; } - - public override getRuntimeToolName(): string { - return "npx" + (RuntimeTestUtils.isWindows() ? ".cmd" : ""); - } - - protected override initRuntime(runOptions: RunOptions): void { - this.npmInstallTsNodeAndWebpack(); - this.npmLinkRuntime(); - } - - protected override getExecFileName(): string { return this.getTestFileName() + ".ts"; } - - protected override getExtraRunArgs(): string[] { return ["tsx"]; } - - protected override compile(runOptions: RunOptions, generatedState: GeneratedState): CompiledState { - - try { - FileUtils.writeFile(this.getTempDirPath(), "package.json", - RuntimeTestUtils.getTextFromResource("org/antlr/v4/test/runtime/helpers/package_ts.json")); - - FileUtils.writeFile(this.getTempDirPath(), "tsconfig.json", - RuntimeTestUtils.getTextFromResource("org/antlr/v4/test/runtime/helpers/tsconfig.json")); - - this.npmInstall(); - - this.npmLinkAntlr4(); - - return new CompiledState(generatedState, null); - - } catch (e) { - if (e instanceof Error) { - return new CompiledState(generatedState, e); - } else { - throw e; - } - } - - } - - private npmInstallTsNodeAndWebpack(): void { - // - } - - private npmLinkRuntime(): void { - // - } - - private npmInstall(): void { - //run([TsNodeRunner.NPM_EXEC, "--silent", "install"], this.getTempDirPath()); - } - - private npmLinkAntlr4(): void { - // run([TsNodeRunner.NPM_EXEC, "--silent", "link", "antlr4"], this.getTempDirPath()); - } - -} diff --git a/runtime-testsuite/src/tests/TypeScriptRuntimeTests.ts b/runtime-testsuite/src/tests/TypeScriptRuntimeTests.ts deleted file mode 100644 index f291c45..0000000 --- a/runtime-testsuite/src/tests/TypeScriptRuntimeTests.ts +++ /dev/null @@ -1,17 +0,0 @@ -/* java2ts: keep */ - -/* - * Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { TsNodeRunner } from "./TsNodeRunner.js"; -import { RuntimeRunner } from "../RuntimeRunner.js"; -import { RuntimeTests } from "../RuntimeTests.js"; - -export class TypeScriptRuntimeTests extends RuntimeTests { - protected override createRuntimeRunner(): RuntimeRunner { - return new TsNodeRunner(); - } -} diff --git a/tool/src/org/antlr/v4/automata/ATNPrinter.ts b/tool/src/org/antlr/v4/automata/ATNPrinter.ts new file mode 100644 index 0000000..2f10b3d --- /dev/null +++ b/tool/src/org/antlr/v4/automata/ATNPrinter.ts @@ -0,0 +1,136 @@ +/* java2ts: keep */ + +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/* eslint-disable jsdoc/require-param, jsdoc/require-returns */ + +import { + ATNState, HashSet, RuleStopState, RuleTransition, EpsilonTransition, ActionTransition, SetTransition, + NotSetTransition, AtomTransition, StarBlockStartState, PlusBlockStartState, BlockStartState, BlockEndState, + RuleStartState, PlusLoopbackState, StarLoopbackState, StarLoopEntryState, ATN, Vocabulary, +} from "antlr4ng"; +import { getTokenDisplayName } from "../misc/helpers.js"; + +/** An ATN walker that knows how to dump them to serialized strings. */ +export class ATNPrinter { + private work: ATNState[]; + private marked: HashSet; + private atn: ATN; + private start: ATNState; + private ruleNames: string[]; + private vocabulary: Vocabulary; + + public constructor(atn: ATN, start: ATNState, ruleNames: string[], vocabulary: Vocabulary) { + this.atn = atn; + this.start = start; + this.ruleNames = ruleNames; + this.vocabulary = vocabulary; + } + + public asString(): string { + this.marked = new HashSet(); + + this.work = []; + this.work.push(this.start); + + let buffer = ""; + while (this.work.length > 0) { + const s = this.work.shift(); + if (!s || this.marked.has(s)) { + continue; + } + + this.marked.add(s); + for (const t of s.transitions) { + if (!(s instanceof RuleStopState)) { // don't add follow states to work + if (t instanceof RuleTransition) { + this.work.push((t).followState); + } else { + this.work.push(t.target); + } + + } + buffer += this.getStateString(s); + if (t instanceof EpsilonTransition) { + buffer += "->" + this.getStateString(t.target) + "\n"; + } else { + if (t instanceof RuleTransition) { + buffer += "-" + this.ruleNames[t.ruleIndex] + "->" + this.getStateString(t.target) + "\n"; + } else { + if (t instanceof ActionTransition) { + const a = t; + buffer += "-" + a.toString() + "->" + this.getStateString(t.target) + "\n"; + } else { + if (t instanceof SetTransition) { + const not = t instanceof NotSetTransition; + if (this.atn.grammarType === 0) { // ATNType.LEXER + buffer += "-" + (not ? "~" : "") + t.toString() + "->" + + this.getStateString(t.target) + "\n"; + } else { + buffer += "-" + (not ? "~" : "") + t.label.toString(this.vocabulary) + + "->" + this.getStateString(t.target) + "\n"; + } + } else { + if (t instanceof AtomTransition) { + const label = getTokenDisplayName(t.labelValue, this.vocabulary, + this.atn.grammarType === 0); + buffer += "-" + label + "->" + this.getStateString(t.target) + "\n"; + } else { + buffer += "-" + t.toString() + "->" + this.getStateString(t.target) + "\n"; + } + } + } + } + } + } + } + + return buffer; + } + + private getStateString(s: ATNState): string { + const n = s.stateNumber; + let stateStr = "s" + n; + if (s instanceof StarBlockStartState) { + stateStr = "StarBlockStart_" + n; + } else { + if (s instanceof PlusBlockStartState) { + stateStr = "PlusBlockStart_" + n; + } else { + if (s instanceof BlockStartState) { + stateStr = "BlockStart_" + n; + } else { + if (s instanceof BlockEndState) { + stateStr = "BlockEnd_" + n; + } else { + if (s instanceof RuleStartState) { + stateStr = "RuleStart_" + this.ruleNames[s.ruleIndex] + "_" + n; + } else { + if (s instanceof RuleStopState) { + stateStr = "RuleStop_" + this.ruleNames[s.ruleIndex] + "_" + n; + } else { + if (s instanceof PlusLoopbackState) { + stateStr = "PlusLoopBack_" + n; + } else { + if (s instanceof StarLoopbackState) { + stateStr = "StarLoopBack_" + n; + } else { + if (s instanceof StarLoopEntryState) { + stateStr = "StarLoopEntry_" + n; + } + } + } + } + } + } + } + } + } + + return stateStr; + } +} diff --git a/tool/src/org/antlr/v4/misc/CharSupport.ts b/tool/src/org/antlr/v4/misc/CharSupport.ts new file mode 100644 index 0000000..fd5b11f --- /dev/null +++ b/tool/src/org/antlr/v4/misc/CharSupport.ts @@ -0,0 +1,251 @@ +/* java2ts: keep */ + +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/* eslint-disable jsdoc/require-returns, jsdoc/require-param */ + +import { IntervalSet, Lexer } from "antlr4ng"; +import { printf } from "fast-printf"; + +import { Character } from "../../../../../../runtime-testsuite/src/helpers/Character.js"; + +export class CharSupport { + /** + * When converting ANTLR char and string literals, here is the + * value set of escape chars. + */ + public static readonly ANTLRLiteralEscapedCharValue: Record = {}; + + /** + Given a char, we need to be able to show as an ANTLR literal. + */ + public static readonly ANTLRLiteralCharValueEscape: string[] = []; + + /** + * Return a string representing the escaped char for code c. E.g., If c + * has value 0x100, you will get "\\u0100". ASCII gets the usual + * char (non-hex) representation. Non-ASCII characters are spit out + * as \\uXXXX or \\u{XXXXXX} escapes. + */ + public static getANTLRCharLiteralForChar(c: number): string { + let result: string; + if (c < Lexer.MIN_CHAR_VALUE) { + result = ""; + } else { + const charValueEscape = c < CharSupport.ANTLRLiteralCharValueEscape.length + ? CharSupport.ANTLRLiteralCharValueEscape[c] : null; + if (charValueEscape !== null) { + result = charValueEscape; + } else { + if (Character.UnicodeBlock.of(c) === Character.UnicodeBlock.BASIC_LATIN && + !Character.isISOControl(c)) { + if (c === 0x5C) { // escape \ itself + result = "\\\\"; + } else { + if (c === 0x27) { // escape single quote + result = "\\'"; + } else { + result = Character.toString(c); + } + } + } else { + if (c <= 0xFFFF) { + result = printf("\\u%04X", c); + } else { + result = printf("\\u{%06X}", c); + } + } + + } + + } + + return "'" + result + "'"; + } + + /** + * Given a literal like (the 3 char sequence with single quotes) 'a', + * return the int value of 'a'. Convert escape sequences here also. + * Return -1 if not single char. + */ + public static getCharValueFromGrammarCharLiteral(literal: string): number { + if (literal.length < 3) { + return -1; + } + + return CharSupport.getCharValueFromCharInGrammarLiteral(literal.substring(1, literal.length - 1)); + } + + public static getStringFromGrammarStringLiteral(literal: string): string | null { + let buffer = ""; + let i = 1; // skip first quote + const n = literal.length - 1; // skip last quote + while (i < n) { // scan all but last quote + let end = i + 1; + if (literal.charAt(i) === "\\") { + end = i + 2; + if (i + 1 < n && literal.charAt(i + 1) === "u") { + if (i + 2 < n && literal.charAt(i + 2) === "{") { // extended escape sequence + end = i + 3; + while (true) { + if (end + 1 > n) { + return null; + } + // invalid escape sequence. + const charAt = literal.charAt(end++); + if (charAt === "}") { + break; + } + + if (!Character.isDigit(charAt.codePointAt(0)!) && !(charAt >= "a" && charAt <= "f") + && !(charAt >= "A" && charAt <= "F")) { + return null; // invalid escape sequence. + } + } + } + else { + for (end = i + 2; end < i + 6; end++) { + if (end > n) { + return null; + } + // invalid escape sequence. + const charAt = literal.charAt(end); + if (!Character.isDigit(charAt.codePointAt(0)!) && !(charAt >= "a" && charAt <= "f") + && !(charAt >= "A" && charAt <= "F")) { + return null; // invalid escape sequence. + } + } + } + } + } + + if (end > n) { + return null; // invalid escape sequence. + } + + const esc = literal.substring(i, end); + const c = CharSupport.getCharValueFromCharInGrammarLiteral(esc); + if (c === -1) { + return null; // invalid escape sequence. + } else { + buffer += String.fromCodePoint(c); + } + + i = end; + } + + return buffer; + } + + /** + * Given char x or \\t or \\u1234 return the char value; + * Unnecessary escapes like '\{' yield -1. + */ + public static getCharValueFromCharInGrammarLiteral(cstr: string): number { + switch (cstr.length) { + case 1: { // 'x' + return cstr.codePointAt(0)!; + } + + // no escape char + case 2: { + if (cstr.charAt(0) !== "\\") { + return -1; + } + + // '\x' (antlr lexer will catch invalid char) + const escChar = cstr.codePointAt(1)!; + if (escChar === 0x27) { // ' + return escChar; + } + + // escape quote only in string literals. + const charVal = CharSupport.ANTLRLiteralEscapedCharValue[escChar]; + if (charVal === 0) { + return -1; + } + + return charVal; + } + + case 6: { + // '\\u1234' or '\\u{12}' + if (!cstr.startsWith("\\u")) { + return -1; + } + + let startOff: number; + let endOff: number; + if (cstr.charAt(2) === "{") { + startOff = 3; + endOff = cstr.indexOf("}"); + } else { + startOff = 2; + endOff = cstr.length; + } + + return CharSupport.parseHexValue(cstr, startOff, endOff); + } + + default: { + if (cstr.startsWith("\\u{")) { + return CharSupport.parseHexValue(cstr, 3, cstr.indexOf("}")); + } + + return -1; + } + + } + } + + public static parseHexValue(cstr: string, startOff: number, endOff: number): number { + if (startOff < 0 || endOff < 0) { + return -1; + } + + const unicodeChars = cstr.substring(startOff, endOff); + const result = parseInt(unicodeChars, 16); + + return result; + } + + public static capitalize(s: string): string { + return Character.toUpperCase(s.charAt(0)) + s.substring(1); + } + + public static getIntervalSetEscapedString(intervalSet: IntervalSet): string { + const parts: string[] = []; + for (const interval of intervalSet) { + parts.push(CharSupport.getRangeEscapedString(interval.start, interval.stop)); + } + + return parts.join(" | "); + } + + public static getRangeEscapedString(codePointStart: number, codePointEnd: number): string { + return codePointStart !== codePointEnd + ? CharSupport.getANTLRCharLiteralForChar(codePointStart) + ".." + + CharSupport.getANTLRCharLiteralForChar(codePointEnd) + : CharSupport.getANTLRCharLiteralForChar(codePointStart); + } + + static { + CharSupport.ANTLRLiteralEscapedCharValue.n = 0x0A; // '\n' + CharSupport.ANTLRLiteralEscapedCharValue.r = 0x0D; // '\r' + CharSupport.ANTLRLiteralEscapedCharValue.t = 0x09; // '\t' + CharSupport.ANTLRLiteralEscapedCharValue.b = 0x08; // '\b' + CharSupport.ANTLRLiteralEscapedCharValue.f = 0x0C; // '\f' + CharSupport.ANTLRLiteralEscapedCharValue["\\"] = 0x5C; // '\\' + CharSupport.ANTLRLiteralEscapedCharValue[0x5C] = 0x5C; + CharSupport.ANTLRLiteralCharValueEscape[0x0A] = "\\n"; + CharSupport.ANTLRLiteralCharValueEscape[0x0D] = "\\r"; + CharSupport.ANTLRLiteralCharValueEscape[0x09] = "\\t"; + CharSupport.ANTLRLiteralCharValueEscape[0x08] = "\\b"; + CharSupport.ANTLRLiteralCharValueEscape[0x0C] = "\\f"; + CharSupport.ANTLRLiteralCharValueEscape[0x5C] = "\\\\"; + } +} diff --git a/tool/src/org/antlr/v4/misc/helpers.ts b/tool/src/org/antlr/v4/misc/helpers.ts new file mode 100644 index 0000000..7540ef5 --- /dev/null +++ b/tool/src/org/antlr/v4/misc/helpers.ts @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/* eslint-disable jsdoc/require-returns , jsdoc/require-param */ + +import { Lexer, Token, Vocabulary } from "antlr4ng"; +import { CharSupport } from "./CharSupport.js"; + +export const INVALID_TOKEN_NAME = ""; + +/** + * Given a token type, get a meaningful name for it such as the ID + * or string literal. If this is a lexer and the ttype is in the + * char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. + */ +export const getTokenDisplayName = (ttype: number, vocabulary: Vocabulary, isLexer: boolean): string => { + // inside any target's char range and is lexer grammar? + if (isLexer && ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE) { + return CharSupport.getANTLRCharLiteralForChar(ttype); + } + + if (ttype === Token.EOF) { + return "EOF"; + } + + if (ttype === Token.INVALID_TYPE) { + return INVALID_TOKEN_NAME; + } + + const result = vocabulary.getDisplayName(ttype); + if (result !== null) { + return result; + } + + return String(ttype); +};