diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e1f9763..0635c7ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Released: TBD ### Minor Changes +- [#430](https://github.com/peggyjs/peggy/pull/430) Make generate-js.js ts clean - [#446](https://github.com/peggyjs/peggy/pull/446) Add a right-associative `ExponentiationExpression` rule (operator `**`) to `javascript.pegjs` example grammar. - [#427](https://github.com/peggyjs/peggy/pull/427) Avoid double extraction of substrings in various MATCH_ bytecodes diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index 911faf74..ece86ac3 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -1,3 +1,4 @@ +// @ts-check "use strict"; const asts = require("../asts"); @@ -8,13 +9,17 @@ const { stringEscape, regexpClassEscape } = require("../utils"); const { SourceNode } = require("source-map-generator"); const GrammarLocation = require("../../grammar-location"); +/** + * @typedef {import("../../peg")} PEG + */ + /** * Converts source text from the grammar into the `source-map` object * * @param {string} code Multiline string with source code - * @param {import("../peg").Location} location + * @param {PEG.LocationRange} location * Location that represents code block in the grammar - * @param {string?} name Name of the code chunk + * @param {string} [name] Name of the code chunk * * @returns {SourceNode} New node that represents code chunk. * Code will be splitted by lines if necessary @@ -48,10 +53,10 @@ function toSourceNode(code, location, name) { * * @param {string} prefix String that will be prepended before mapped chunk * @param {string} chunk Chunk for mapping (possible multiline) - * @param {import("../../peg").Location} location + * @param {PEG.LocationRange} location * Location that represents chunk in the grammar * @param {string} suffix String that will be appended after mapped chunk - * @param {string?} name Name of the code chunk + * @param {string} [name] Name of the code chunk * * @returns {SourceNode} New node that represents code chunk. * Code will be splitted by lines if necessary @@ -80,9 +85,53 @@ function wrapInSourceNode(prefix, chunk, location, suffix, name) { return new SourceNode(null, null, null, [prefix, chunk, suffix]); } -// Generates parser JavaScript code. +/** + * @typedef {(string|SourceNode)[]} SourceArray + * + * @typedef {PEG.SourceBuildOptions} SourceBuildOptions + * @typedef {object} ExtraOptions + * @property {PEG.Dependencies} [dependencies] + * @property {string} [exportVar] + * @typedef {SourceBuildOptions & ExtraOptions} Options + */ +/** + * Generates parser JavaScript code. + * + * @param {PEG.ast.Grammar} ast + * @param {Options} options + */ function generateJS(ast, options) { - // These only indent non-empty lines to avoid trailing whitespace. + if (!ast.literals || !ast.locations || !ast.classes + || !ast.expectations || !ast.functions) { + throw new Error( + "generateJS: generate bytecode was not called." + ); + } + const { literals, locations, classes, expectations, functions } = ast; + if (!options.allowedStartRules) { + throw new Error( + "generateJS: options.allowedStartRules was not set." + ); + } + const { allowedStartRules } = options; + /** @type {PEG.Dependencies} */ + const dependencies = options.dependencies || {}; + + /** + * @overload + * @param {string} code + * @returns {string} + */ + /** + * @overload + * @param {SourceArray} code + * @returns {SourceArray} + */ + /** + * These only indent non-empty lines to avoid trailing whitespace. + * @param {SourceArray} code + * @returns {SourceArray} + */ function indent2(code) { /* * - raw lines (outside of SourceNodes) have implict newlines @@ -113,9 +162,33 @@ function generateJS(ast, options) { */ let sawEol = true; let inSourceNode = 0; + /** + * @overload + * @param {string | SourceNode} code + * @returns {string | SourceNode} + */ + /** + * @overload + * @param {SourceNode} code + * @returns {SourceNode} + */ + /** + * @overload + * @param {SourceNode[]} code + * @returns {SourceNode[]} + */ + /** + * @overload + * @param {SourceArray} code + * @returns {SourceArray} + */ + /** + * @param {SourceArray | string | SourceNode} code + * @returns {SourceArray | string | SourceNode} + */ function helper(code) { if (Array.isArray(code)) { - return code.map(helper); + return code.map(s => helper(s)); } if (code instanceof SourceNode) { inSourceNode++; @@ -139,20 +212,28 @@ function generateJS(ast, options) { } return helper(code); } - + /** @param {number} i */ function l(i) { return "peg$c" + i; } // |literals[i]| of the abstract machine + /** @param {number} i */ function r(i) { return "peg$r" + i; } // |classes[i]| of the abstract machine + /** @param {number} i */ function e(i) { return "peg$e" + i; } // |expectations[i]| of the abstract machine + /** @param {number} i */ function f(i) { return "peg$f" + i; } // |actions[i]| of the abstract machine - /** Generates name of the function that parses specified rule. */ + /** + * Generates name of the function that parses specified rule. + * @param {string} name + */ function name(name) { return "peg$parse" + name; } function generateTables() { + /** @param {string} literal */ function buildLiteral(literal) { return "\"" + stringEscape(literal) + "\""; } + /** @param {PEG.ast.GrammarCharacterClass} cls */ function buildRegexp(cls) { return "/^[" + (cls.inverted ? "^" : "") @@ -164,6 +245,7 @@ function generateJS(ast, options) { + "]/" + (cls.ignoreCase ? "i" : ""); } + /** @param {PEG.ast.GrammarExpectation} e */ function buildExpectation(e) { switch (e.type) { case "rule": { @@ -193,6 +275,10 @@ function generateJS(ast, options) { } } + /** + * @param {PEG.ast.FunctionConst} a + * @param {number} i + */ function buildFunc(a, i) { return wrapInSourceNode( `\n var ${f(i)} = function(${a.params.join(", ")}) {`, @@ -204,19 +290,24 @@ function generateJS(ast, options) { return new SourceNode( null, null, options.grammarSource, [ - ast.literals.map( + literals.map( (c, i) => " var " + l(i) + " = " + buildLiteral(c) + ";" - ).concat("", ast.classes.map( + ).concat("", classes.map( (c, i) => " var " + r(i) + " = " + buildRegexp(c) + ";" - )).concat("", ast.expectations.map( + )).concat("", expectations.map( (c, i) => " var " + e(i) + " = " + buildExpectation(c) + ";" )).concat("").join("\n"), - ast.functions.map(buildFunc), + ...functions.map(buildFunc), ] ); } + /** + * @param {string} ruleNameCode + * @param {number} ruleIndexCode + */ function generateRuleHeader(ruleNameCode, ruleIndexCode) { + /** @type {string[]} */ const parts = []; parts.push(""); @@ -272,7 +363,12 @@ function generateJS(ast, options) { return parts; } + /** + * @param {string} ruleNameCode + * @param {string} resultCode + */ function generateRuleFooter(ruleNameCode, resultCode) { + /** @type {string[]} */ const parts = []; if (options.cache) { @@ -310,34 +406,40 @@ function generateJS(ast, options) { return parts; } + /** @param {PEG.ast.Rule} rule */ function generateRuleFunction(rule) { + /** @type {SourceArray} */ const parts = []; - const stack = new Stack(rule.name, "s", "var", rule.bytecode); + const bytecode = /** @type {number[]} */(rule.bytecode); + const stack = new Stack(rule.name, "s", "var", bytecode); + /** @param {number[]} bc */ function compile(bc) { let ip = 0; const end = bc.length; const parts = []; let value = undefined; + /** + * @param {string} cond + * @param {number} argCount + * @param {((bc: number[])=>SourceArray) | null} [thenFn] + */ function compileCondition(cond, argCount, thenFn) { const baseLength = argCount + 3; const thenLength = bc[ip + baseLength - 2]; const elseLength = bc[ip + baseLength - 1]; - let thenCode = undefined; - let elseCode = undefined; - stack.checkedIf( + const [thenCode, elseCode] = stack.checkedIf( ip, () => { - ip += baseLength; - thenCode = (thenFn || compile)(bc.slice(ip, ip + thenLength)); - ip += thenLength; + ip += baseLength + thenLength; + return (thenFn || compile)(bc.slice(ip - thenLength, ip)); }, (elseLength > 0) ? () => { - elseCode = compile(bc.slice(ip, ip + elseLength)); ip += elseLength; + return compile(bc.slice(ip - elseLength, ip)); } : null ); @@ -351,7 +453,7 @@ function generateJS(ast, options) { parts.push("}"); } - /* + /** MATCH_* opcodes typically do something like if ((input.substr(peg$currPos, length))) { @@ -373,6 +475,9 @@ function generateJS(ast, options) { } and avoid extracting the sub string twice. + @param {(chunk:string, optimized:boolean)=>string} condFn + @param {number} argCount + @param {number} inputChunkLength */ function compileInputChunkCondition( condFn, argCount, inputChunkLength @@ -387,6 +492,7 @@ function generateJS(ast, options) { // Push the assignment to the next available variable. parts.push(stack.push(inputChunk)); inputChunk = stack.pop(); + /** @param {number[]} bc */ thenFn = bc => { // The bc[0] is an ACCEPT_N, and bc[1] is the N. We've already done // the assignment (before the if), so we just need to bump the @@ -404,15 +510,14 @@ function generateJS(ast, options) { compileCondition(condFn(inputChunk, thenFn !== null), argCount, thenFn); } + /** @param {string} cond */ function compileLoop(cond) { const baseLength = 2; const bodyLength = bc[ip + baseLength - 1]; - let bodyCode = undefined; - stack.checkedLoop(ip, () => { - ip += baseLength; - bodyCode = compile(bc.slice(ip, ip + bodyLength)); - ip += bodyLength; + const bodyCode = stack.checkedLoop(ip, () => { + ip += baseLength + bodyLength; + return compile(bc.slice(ip - bodyLength, ip)); }); parts.push("while (" + cond + ") {"); @@ -420,6 +525,7 @@ function generateJS(ast, options) { parts.push("}"); } + /** @param {number} baseLength */ function compileCall(baseLength) { const paramsLength = bc[ip + baseLength - 1]; @@ -492,7 +598,6 @@ function generateJS(ast, options) { case op.WRAP: // WRAP n parts.push( - // @ts-expect-error pop() returns array if argument is specified stack.push("[" + stack.pop(bc[ip + 1]).join(", ") + "]") ); ip += 2; @@ -559,7 +664,7 @@ function generateJS(ast, options) { case op.MATCH_STRING: { // MATCH_STRING s, a, f, ... const litNum = bc[ip + 1]; - const literal = ast.literals[litNum]; + const literal = literals[litNum]; compileInputChunkCondition( (inputChunk, optimized) => { if (literal.length > 1) { @@ -581,7 +686,7 @@ function generateJS(ast, options) { compileInputChunkCondition( inputChunk => `${inputChunk}.toLowerCase() === ${l(litNum)}`, 1, - ast.literals[litNum].length + literals[litNum].length ); break; } @@ -611,8 +716,8 @@ function generateJS(ast, options) { case op.ACCEPT_STRING: // ACCEPT_STRING s parts.push(stack.push(l(bc[ip + 1]))); parts.push( - ast.literals[bc[ip + 1]].length > 1 - ? "peg$currPos += " + ast.literals[bc[ip + 1]].length + ";" + literals[bc[ip + 1]].length > 1 + ? "peg$currPos += " + literals[bc[ip + 1]].length + ";" : "peg$currPos++;" ); ip += 2; @@ -659,7 +764,7 @@ function generateJS(ast, options) { case op.SOURCE_MAP_PUSH: stack.sourceMapPush( parts, - ast.locations[bc[ip + 1]] + locations[bc[ip + 1]] ); ip += 2; break; @@ -672,8 +777,8 @@ function generateJS(ast, options) { case op.SOURCE_MAP_LABEL_PUSH: stack.labels[bc[ip + 1]] = { - label: ast.literals[bc[ip + 2]], - location: ast.locations[bc[ip + 3]], + label: literals[bc[ip + 2]], + location: locations[bc[ip + 3]], }; ip += 4; break; @@ -685,14 +790,14 @@ function generateJS(ast, options) { // istanbul ignore next Because we never generate invalid bytecode we cannot reach this branch default: - throw new Error("Invalid opcode: " + bc[ip] + ".", { rule: rule.name, bytecode: bc }); + throw new Error("Invalid opcode: " + bc[ip] + "."); } } return parts; } - const code = compile(rule.bytecode); + const code = compile(bytecode); parts.push(wrapInSourceNode( "function ", @@ -723,6 +828,10 @@ function generateJS(ast, options) { return parts; } + /** + * @template {string} T + * @param {PEG.ast.CodeBlock} node + */ function ast2SourceNode(node) { // If location is not defined (for example, AST node was replaced // by a plugin and does not provide location information, see @@ -969,11 +1078,11 @@ function generateJS(ast, options) { } const startRuleFunctions = "{ " - + options.allowedStartRules.map( + + allowedStartRules.map( r => r + ": " + name(r) ).join(", ") + " }"; - const startRuleFunction = name(options.allowedStartRules[0]); + const startRuleFunction = name(allowedStartRules[0]); parts.push( "function peg$parse(input, options) {", @@ -1205,7 +1314,9 @@ function generateJS(ast, options) { ); } + /** @param {SourceNode} toplevelCode */ function generateWrapper(toplevelCode) { + /** @return {(string|SourceNode)[]} */ function generateGeneratedByComment() { return [ `// @generated by Peggy ${VERSION}.`, @@ -1246,7 +1357,7 @@ function generateJS(ast, options) { }, commonjs() { - const dependencyVars = Object.keys(options.dependencies); + const dependencyVars = Object.keys(dependencies); const parts = generateGeneratedByComment(); parts.push( @@ -1260,7 +1371,7 @@ function generateJS(ast, options) { parts.push( "var " + variable + " = require(\"" - + stringEscape(options.dependencies[variable]) + + stringEscape(dependencies[variable]) + "\");" ); }); @@ -1277,7 +1388,7 @@ function generateJS(ast, options) { }, es() { - const dependencyVars = Object.keys(options.dependencies); + const dependencyVars = Object.keys(dependencies); const parts = generateGeneratedByComment(); parts.push(""); @@ -1287,7 +1398,7 @@ function generateJS(ast, options) { parts.push( "import " + variable + " from \"" - + stringEscape(options.dependencies[variable]) + + stringEscape(dependencies[variable]) + "\";" ); }); @@ -1308,9 +1419,9 @@ function generateJS(ast, options) { }, amd() { - const dependencyVars = Object.keys(options.dependencies); - const dependencyIds = dependencyVars.map(v => options.dependencies[v]); - const dependencies = "[" + const dependencyVars = Object.keys(dependencies); + const dependencyIds = dependencyVars.map(v => dependencies[v]); + const deps = "[" + dependencyIds.map( id => "\"" + stringEscape(id) + "\"" ).join(", ") @@ -1319,7 +1430,7 @@ function generateJS(ast, options) { return [ ...generateGeneratedByComment(), - "define(" + dependencies + ", function(" + params + ") {", + "define(" + deps + ", function(" + params + ") {", " \"use strict\";", "", toplevelCode, @@ -1343,9 +1454,9 @@ function generateJS(ast, options) { }, umd() { - const dependencyVars = Object.keys(options.dependencies); - const dependencyIds = dependencyVars.map(v => options.dependencies[v]); - const dependencies = "[" + const dependencyVars = Object.keys(dependencies); + const dependencyIds = dependencyVars.map(v => dependencies[v]); + const deps = "[" + dependencyIds.map( id => "\"" + stringEscape(id) + "\"" ).join(", ") @@ -1359,7 +1470,7 @@ function generateJS(ast, options) { parts.push( "(function(root, factory) {", " if (typeof define === \"function\" && define.amd) {", - " define(" + dependencies + ", factory);", + " define(" + deps + ", factory);", " } else if (typeof module === \"object\" && module.exports) {", " module.exports = factory(" + requires + ");" ); @@ -1386,7 +1497,7 @@ function generateJS(ast, options) { }, }; - const parts = generators[options.format](); + const parts = generators[options.format || "bare"](); return new SourceNode( // eslint-disable-next-line function-call-argument-newline -- This expression has a better readability when on two lines diff --git a/lib/compiler/stack.js b/lib/compiler/stack.js index 6496c093..cedb6889 100644 --- a/lib/compiler/stack.js +++ b/lib/compiler/stack.js @@ -22,7 +22,10 @@ class Stack { this.ruleName = ruleName; this.type = type; this.bytecode = bytecode; - /* Map from stack index, to label targetting that index */ + /** + * Map from stack index, to label targetting that index + * @type {Record} + */ this.labels = {}; /* Stack of in-flight source mappings */ this.sourceMapStack = []; @@ -106,11 +109,22 @@ class Stack { return code.join(""); } + /** + * @overload + * @param {undefined} [n] + * @return {string} + */ + /** + * @overload + * @param {number} n + * @return {string[]} + */ /** * Returns name or `n` names of the variable(s) from the top of the stack. * - * @param {number} [n=1] Quantity of variables, which need to be removed from the stack - * @return {string|string[]} Generated name(s). If `n > 1` than array has length of `n` + * @param {number} [n] Quantity of variables, which need to be removed from the stack + * @returns {string[]|string} Generated name(s). If n is defined then it returns an + * array of length `n` * * @throws {RangeError} If the stack underflow (there are more `pop`s than `push`es) */ @@ -185,10 +199,11 @@ class Stack { /** * Checks that code in the `generateIf` and `generateElse` move the stack pointer in the same way. * + * @template T * @param {number} pos Opcode number for error messages - * @param {function()} generateIf First function that works with this stack - * @param {function()} [generateElse] Second function that works with this stack - * @return {undefined} + * @param {() => T} generateIf First function that works with this stack + * @param {(() => T)|null} [generateElse] Second function that works with this stack + * @return {T[]} * * @throws {Error} If `generateElse` is defined and the stack pointer moved differently in the * `generateIf` and `generateElse` @@ -196,38 +211,41 @@ class Stack { checkedIf(pos, generateIf, generateElse) { const baseSp = this.sp; - generateIf(); + const ifResult = generateIf(); - if (generateElse) { - const thenSp = this.sp; + if (!generateElse) { + return [ifResult]; + } + const thenSp = this.sp; - this.sp = baseSp; - generateElse(); + this.sp = baseSp; + const elseResult = generateElse(); - if (thenSp !== this.sp) { - throw new Error( - "Rule '" + this.ruleName + "', position " + pos + ": " - + "Branches of a condition can't move the stack pointer differently " - + "(before: " + baseSp + ", after then: " + thenSp + ", after else: " + this.sp + "). " - + "Bytecode: " + this.bytecode - ); - } + if (thenSp !== this.sp) { + throw new Error( + "Rule '" + this.ruleName + "', position " + pos + ": " + + "Branches of a condition can't move the stack pointer differently " + + "(before: " + baseSp + ", after then: " + thenSp + ", after else: " + this.sp + "). " + + "Bytecode: " + this.bytecode + ); } + return [ifResult, elseResult]; } /** * Checks that code in the `generateBody` do not move stack pointer. * + * @template T * @param {number} pos Opcode number for error messages - * @param {function()} generateBody Function that works with this stack - * @return {undefined} + * @param {() => T} generateBody Function that works with this stack + * @return {T} * * @throws {Error} If `generateBody` move the stack pointer (if it contains unbalanced `push`es and `pop`s) */ checkedLoop(pos, generateBody) { const baseSp = this.sp; - generateBody(); + const result = generateBody(); if (baseSp !== this.sp) { throw new Error( @@ -237,6 +255,7 @@ class Stack { + "Bytecode: " + this.bytecode ); } + return result; } sourceMapPush(parts, location) { diff --git a/lib/peg.d.ts b/lib/peg.d.ts index 0eb28220..7cacf822 100644 --- a/lib/peg.d.ts +++ b/lib/peg.d.ts @@ -51,6 +51,23 @@ declare namespace ast { location: LocationRange; } + /** + * Type of the classes field on a Grammar node. Not quite the same as + * CharacterClass (`parts` was renamed to `value`). + */ + interface GrammarCharacterClass { + value: (string[] | string)[]; + inverted: boolean; + ignoreCase: boolean; + } + + type GrammarExpectation = + | { type: "any" } + | { type: "literal"; value: string; ignoreCase: boolean } + | { type: "rule"; value: string } + | GrammarCharacterClass & { type: "class" } + ; + /** The main Peggy AST class returned by the parser. */ interface Grammar extends Node<"grammar"> { /** Initializer that run once when importing generated parser module. */ @@ -71,8 +88,8 @@ declare namespace ast { * bytecodes to refer back to via index. */ literals?: string[]; - classes?: CharacterClass[]; - expectations?: parser.Expectation[]; + classes?: GrammarCharacterClass[]; + expectations?: GrammarExpectation[]; functions?: FunctionConst[]; locations?: LocationRange[]; } diff --git a/test/types/peg.test-d.ts b/test/types/peg.test-d.ts index dc4e0829..ce1d4e0d 100644 --- a/test/types/peg.test-d.ts +++ b/test/types/peg.test-d.ts @@ -176,7 +176,6 @@ describe("peg.d.ts", () => { it("creates an AST", () => { const grammar = peggy.parser.parse(src); expectType(grammar); - const visited: { [typ: string]: number } = {}; function add(typ: string): void { if (!visited[typ]) { @@ -197,6 +196,17 @@ describe("peg.d.ts", () => { ); expectType(node.initializer); expectType(node.rules); + expectType(node.literals); + expectType(node.classes); + expectType( + node.expectations + ); + expectType( + node.functions + ); + expectType( + node.locations + ); if (node.topLevelInitializer) { visit(node.topLevelInitializer); diff --git a/test/unit/compiler/passes/generate-js.spec.js b/test/unit/compiler/passes/generate-js.spec.js new file mode 100644 index 00000000..7b13bd10 --- /dev/null +++ b/test/unit/compiler/passes/generate-js.spec.js @@ -0,0 +1,56 @@ +// @ts-check +"use strict"; + +const chai = require("chai"); +const pass = require("../../../../lib/compiler/passes/generate-js"); + +const { expect } = chai; +/** + * @typedef {import("../../../../lib/peg")} PEG + */ + +describe("compiler pass |generateJS|", () => { + describe("coverage", () => { + /** @type {PEG.ast.Grammar} */ + const ast = { + type: "grammar", + rules: [], + location: { + source: "", + start: { line:1, column:1, offset:0 }, + end: { line:1, column:1, offset:0 }, + }, + }; + const options + = /** @type {PEG.SourceBuildOptions} */({}); + it("throws unless various grammar fields are set", () => { + expect( + () => pass(ast, options) + ).to.throw(Error, "generateJS: generate bytecode was not called."); + ast.literals = []; + expect( + () => pass({ ...ast, literals:[] }, options) + ).to.throw(Error, "generateJS: generate bytecode was not called."); + ast.locations = []; + expect( + () => pass({ ...ast, literals:[] }, options) + ).to.throw(Error, "generateJS: generate bytecode was not called."); + ast.classes = []; + expect( + () => pass({ ...ast, literals:[] }, options) + ).to.throw(Error, "generateJS: generate bytecode was not called."); + ast.expectations = []; + expect( + () => pass({ ...ast, literals:[] }, options) + ).to.throw(Error, "generateJS: generate bytecode was not called."); + ast.functions = []; + expect( + () => pass(ast, options) + ).to.throw(Error, "generateJS: options.allowedStartRules was not set."); + options.allowedStartRules = ["start"]; + expect( + () => pass(ast, options) + ).to.not.throw(); + }); + }); +});