Refactor the Parser to allow implementing a simple LexerLess parser

by using method overrides. A more proper solution with depedency injection proved too damaging to performance... Part of #521 Fixes #528
Chevrotain · Jul 24, 2017 · a8fc00c · a8fc00c
1 parent 00f818a
commit a8fc00c
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 9 deletions.
diff --git a/src/parse/grammar/lookahead.ts b/src/parse/grammar/lookahead.ts
@@ -16,7 +16,8 @@ import {
  IAnyOrAlt,
  TokenMatcher,
  TokenInstanceIdentityFunc,
- TokenClassIdentityFunc
+ TokenClassIdentityFunc,
+ lookAheadSequence
 } from "../parser_public"
 import { TokenConstructor } from "../../scan/lexer_public"
 
@@ -116,7 +117,6 @@ export function buildLookaheadFuncForOptionalProd(
 }
 
 export type Alternative = TokenConstructor[][]
-export type lookAheadSequence = TokenConstructor[][]
 
 export function buildAlternativesLookAheadFunc(
  alts: lookAheadSequence[],

diff --git a/src/parse/parser_public.ts b/src/parse/parser_public.ts
@@ -53,7 +53,6 @@ import {
  buildSingleAlternativeLookaheadFunction,
  getLookaheadPathsForOptionalProd,
  getLookaheadPathsForOr,
- lookAheadSequence,
  PROD_TYPE
 } from "./grammar/lookahead"
 import {
@@ -144,6 +143,8 @@ export type TokenMatcher = (
 export type TokenInstanceIdentityFunc = (tok: IToken) => string
 export type TokenClassIdentityFunc = (tok: TokenConstructor) => string
 
+export type lookAheadSequence = TokenConstructor[][]
+
 export interface IParserConfig {
  /**
  * Is the error recovery / fault tolerance of the Chevrotain Parser enabled.
@@ -2990,12 +2991,12 @@ export class Parser {
  )
  }
 
- private getLookaheadFuncFor<T>(
+ private getLookaheadFuncFor(
  key: number,
  occurrence: number,
  maxLookahead: number,
  prodType
- ): () => T {
+ ): () => boolean {
  let laFunc = <any>this.classLAFuncs.get(key)
  if (laFunc === undefined) {
  let ruleName = this.getCurrRuleFullName()
@@ -3251,7 +3252,7 @@ export class Parser {
  tokenClassIdentityFunc: TokenClassIdentityFunc,
  tokenInstanceIdentityFunc: TokenInstanceIdentityFunc,
  dynamicTokensEnabled: boolean
- ): (orAlts?: IAnyOrAlt<any>[]) => number {
+ ): (orAlts?: IAnyOrAlt<any>[]) => number | undefined {
  return buildAlternativesLookAheadFunc(
  alts,
  hasPredicates,

diff --git a/test/full_flow/ecma_quirks/ecma_quirks.ts b/test/full_flow/ecma_quirks/ecma_quirks.ts
@@ -2,8 +2,17 @@
 
 import { createToken, IToken, Token } from "../../../src/scan/tokens_public"
 import { Lexer, TokenConstructor } from "../../../src/scan/lexer_public"
-import { END_OF_FILE, Parser } from "../../../src/parse/parser_public"
+import {
+ END_OF_FILE,
+ IAnyOrAlt,
+ lookAheadSequence,
+ Parser,
+ TokenClassIdentityFunc,
+ TokenInstanceIdentityFunc,
+ TokenMatcher
+} from "../../../src/parse/parser_public"
 import { exceptions } from "../../../src/parse/exceptions_public"
+import { every, flatten, map } from "../../../src/utils/utils"
 
 const Return = createToken({
  name: "Return",
@@ -57,7 +66,9 @@ class EcmaScriptQuirksParser extends Parser {
 
  public statement = this.RULE("statement", () => {
  this.CONSUME(Return)
- // this.SUBRULE(this.expression)
+ this.OPTION(() => {
+ this.SUBRULE(this.expression)
+ })
  this.CONSUME(Semicolon)
  })
 
@@ -161,6 +172,87 @@ class EcmaScriptQuirksParser extends Parser {
  protected importLexerState(newState: number) {
  this.textIdx = newState
  }
+
+ protected lookAheadBuilderForOptional(
+ alt: lookAheadSequence,
+ tokenMatcher: TokenMatcher,
+ tokenClassIdentityFunc: TokenClassIdentityFunc,
+ tokenInstanceIdentityFunc: TokenInstanceIdentityFunc,
+ dynamicTokensEnabled: boolean
+ ): () => boolean {
+ if (!every(alt, currAlt => currAlt.length === 1)) {
+ throw Error(
+ "This scannerLess parser only supports LL(1) lookahead."
+ )
+ }
+
+ const allTokenTypes = flatten(alt)
+
+ return function() {
+ // save & restore lexer state as otherwise the text index will move ahead
+ // and the parser will fail consuming the tokens we have looked ahead for.
+ let lexerState = this.exportLexerState()
+ try {
+ for (let i = 0; i < allTokenTypes.length; i++) {
+ const nextToken = this.IS_NEXT_TOKEN(allTokenTypes[i])
+ if (nextToken !== false) {
+ return true
+ }
+ }
+ return false
+ } finally {
+ // this scannerLess parser is not very smart and efficient
+ // because we do not remember the last token was saw while lookahead
+ // we will have to lex it twice, once during lookahead and once during consumption...
+ this.importLexerState(lexerState)
+ }
+ }
+ }
+
+ protected lookAheadBuilderForAlternatives(
+ alts: lookAheadSequence[],
+ hasPredicates: boolean,
+ tokenMatcher: TokenMatcher,
+ tokenClassIdentityFunc: TokenClassIdentityFunc,
+ tokenInstanceIdentityFunc: TokenInstanceIdentityFunc,
+ dynamicTokensEnabled: boolean
+ ): (orAlts?: IAnyOrAlt<any>[]) => number | undefined {
+ if (
+ !every(alts, currPath =>
+ every(currPath, currAlt => currAlt.length === 1)
+ )
+ ) {
+ throw Error(
+ "This scannerLess parser only supports LL(1) lookahead."
+ )
+ }
+
+ const allTokenTypesPerAlt = map(alts, flatten)
+
+ return function() {
+ // save & restore lexer state as otherwise the text index will move ahead
+ // and the parser will fail consuming the tokens we have looked ahead for.
+ let lexerState = this.exportLexerState()
+ try {
+ for (let i = 0; i < allTokenTypesPerAlt.length; i++) {
+ const currAltTypes = allTokenTypesPerAlt[i]
+
+ for (let j = 0; j < currAltTypes.length; j++) {
+ const nextToken = this.IS_NEXT_TOKEN(currAltTypes[j])
+ if (nextToken !== false) {
+ return i
+ }
+ }
+ }
+ return undefined
+ } finally {
+ // this scannerLess parser is not very smart and efficient
+ // because we do not remember the last token was saw while lookahead
+ // we will have to lex it twice, once during lookahead and once during consumption...
+ this.importLexerState(lexerState)
+ }
+ }
+ }
 }
 
 // reuse the same parser instance.

diff --git a/test/full_flow/ecma_quirks/ecma_quirks_spec.ts b/test/full_flow/ecma_quirks/ecma_quirks_spec.ts
@@ -1,8 +1,28 @@
 import { parse } from "./ecma_quirks"
 
-describe("ECMAScript Quirks Example (ScannerLess Mode)", () => {
+describe.only("ECMAScript Quirks Example (ScannerLess Mode)", () => {
  it("can parse a valid text successfully", () => {
  const result = parse("return ;")
  expect(result.errors).to.be.empty
  })
+
+ it("can parse a valid text successfully #2", () => {
+ const result = parse("return 1;")
+ expect(result.errors).to.be.empty
+ })
+
+ it("can parse a valid text successfully #3 - Division", () => {
+ const result = parse("return 8 / 2 ;")
+ expect(result.errors).to.be.empty
+ })
+
+ it("can parse a valid text successfully #3 - RegExp", () => {
+ const result = parse("return /123/ ;")
+ expect(result.errors).to.be.empty
+ })
+
+ it("can parse a valid text successfully #3 - RegExp and Division", () => {
+ const result = parse("return /123/ / 5 ;")
+ expect(result.errors).to.be.empty
+ })
 })