Skip to content

Commit

Permalink
Refactor the Parser to allow implementing a simple LexerLess parser
Browse files Browse the repository at this point in the history
by using method overrides.

A more proper solution with depedency injection proved too damaging to performance...

Part of #521
Fixes #528
  • Loading branch information
bd82 authored and Shahar Soel committed Jul 24, 2017
1 parent 00f818a commit a8fc00c
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 9 deletions.
4 changes: 2 additions & 2 deletions src/parse/grammar/lookahead.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import {
IAnyOrAlt,
TokenMatcher,
TokenInstanceIdentityFunc,
TokenClassIdentityFunc
TokenClassIdentityFunc,
lookAheadSequence
} from "../parser_public"
import { TokenConstructor } from "../../scan/lexer_public"

Expand Down Expand Up @@ -116,7 +117,6 @@ export function buildLookaheadFuncForOptionalProd(
}

export type Alternative = TokenConstructor[][]
export type lookAheadSequence = TokenConstructor[][]

export function buildAlternativesLookAheadFunc(
alts: lookAheadSequence[],
Expand Down
9 changes: 5 additions & 4 deletions src/parse/parser_public.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ import {
buildSingleAlternativeLookaheadFunction,
getLookaheadPathsForOptionalProd,
getLookaheadPathsForOr,
lookAheadSequence,
PROD_TYPE
} from "./grammar/lookahead"
import {
Expand Down Expand Up @@ -144,6 +143,8 @@ export type TokenMatcher = (
export type TokenInstanceIdentityFunc = (tok: IToken) => string
export type TokenClassIdentityFunc = (tok: TokenConstructor) => string

export type lookAheadSequence = TokenConstructor[][]

export interface IParserConfig {
/**
* Is the error recovery / fault tolerance of the Chevrotain Parser enabled.
Expand Down Expand Up @@ -2990,12 +2991,12 @@ export class Parser {
)
}

private getLookaheadFuncFor<T>(
private getLookaheadFuncFor(
key: number,
occurrence: number,
maxLookahead: number,
prodType
): () => T {
): () => boolean {
let laFunc = <any>this.classLAFuncs.get(key)
if (laFunc === undefined) {
let ruleName = this.getCurrRuleFullName()
Expand Down Expand Up @@ -3251,7 +3252,7 @@ export class Parser {
tokenClassIdentityFunc: TokenClassIdentityFunc,
tokenInstanceIdentityFunc: TokenInstanceIdentityFunc,
dynamicTokensEnabled: boolean
): (orAlts?: IAnyOrAlt<any>[]) => number {
): (orAlts?: IAnyOrAlt<any>[]) => number | undefined {
return buildAlternativesLookAheadFunc(
alts,
hasPredicates,
Expand Down
96 changes: 94 additions & 2 deletions test/full_flow/ecma_quirks/ecma_quirks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,17 @@

import { createToken, IToken, Token } from "../../../src/scan/tokens_public"
import { Lexer, TokenConstructor } from "../../../src/scan/lexer_public"
import { END_OF_FILE, Parser } from "../../../src/parse/parser_public"
import {
END_OF_FILE,
IAnyOrAlt,
lookAheadSequence,
Parser,
TokenClassIdentityFunc,
TokenInstanceIdentityFunc,
TokenMatcher
} from "../../../src/parse/parser_public"
import { exceptions } from "../../../src/parse/exceptions_public"
import { every, flatten, map } from "../../../src/utils/utils"

const Return = createToken({
name: "Return",
Expand Down Expand Up @@ -57,7 +66,9 @@ class EcmaScriptQuirksParser extends Parser {

public statement = this.RULE("statement", () => {
this.CONSUME(Return)
// this.SUBRULE(this.expression)
this.OPTION(() => {
this.SUBRULE(this.expression)
})
this.CONSUME(Semicolon)
})

Expand Down Expand Up @@ -161,6 +172,87 @@ class EcmaScriptQuirksParser extends Parser {
protected importLexerState(newState: number) {
this.textIdx = newState
}

protected lookAheadBuilderForOptional(
alt: lookAheadSequence,
tokenMatcher: TokenMatcher,
tokenClassIdentityFunc: TokenClassIdentityFunc,
tokenInstanceIdentityFunc: TokenInstanceIdentityFunc,
dynamicTokensEnabled: boolean
): () => boolean {
if (!every(alt, currAlt => currAlt.length === 1)) {
throw Error(
"This scannerLess parser only supports LL(1) lookahead."
)
}

const allTokenTypes = flatten(alt)

return function() {
// save & restore lexer state as otherwise the text index will move ahead
// and the parser will fail consuming the tokens we have looked ahead for.
let lexerState = this.exportLexerState()
try {
for (let i = 0; i < allTokenTypes.length; i++) {
const nextToken = this.IS_NEXT_TOKEN(allTokenTypes[i])
if (nextToken !== false) {
return true
}
}
return false
} finally {
// this scannerLess parser is not very smart and efficient
// because we do not remember the last token was saw while lookahead
// we will have to lex it twice, once during lookahead and once during consumption...
this.importLexerState(lexerState)
}
}
}

protected lookAheadBuilderForAlternatives(
alts: lookAheadSequence[],
hasPredicates: boolean,
tokenMatcher: TokenMatcher,
tokenClassIdentityFunc: TokenClassIdentityFunc,
tokenInstanceIdentityFunc: TokenInstanceIdentityFunc,
dynamicTokensEnabled: boolean
): (orAlts?: IAnyOrAlt<any>[]) => number | undefined {
if (
!every(alts, currPath =>
every(currPath, currAlt => currAlt.length === 1)
)
) {
throw Error(
"This scannerLess parser only supports LL(1) lookahead."
)
}

const allTokenTypesPerAlt = map(alts, flatten)

return function() {
// save & restore lexer state as otherwise the text index will move ahead
// and the parser will fail consuming the tokens we have looked ahead for.
let lexerState = this.exportLexerState()
try {
for (let i = 0; i < allTokenTypesPerAlt.length; i++) {
const currAltTypes = allTokenTypesPerAlt[i]

for (let j = 0; j < currAltTypes.length; j++) {
const nextToken = this.IS_NEXT_TOKEN(currAltTypes[j])
if (nextToken !== false) {
return i
}
}
}
return undefined
} finally {
// this scannerLess parser is not very smart and efficient
// because we do not remember the last token was saw while lookahead
// we will have to lex it twice, once during lookahead and once during consumption...
this.importLexerState(lexerState)
}
}
}
}

// reuse the same parser instance.
Expand Down
22 changes: 21 additions & 1 deletion test/full_flow/ecma_quirks/ecma_quirks_spec.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
import { parse } from "./ecma_quirks"

describe("ECMAScript Quirks Example (ScannerLess Mode)", () => {
describe.only("ECMAScript Quirks Example (ScannerLess Mode)", () => {
it("can parse a valid text successfully", () => {
const result = parse("return ;")
expect(result.errors).to.be.empty
})

it("can parse a valid text successfully #2", () => {
const result = parse("return 1;")
expect(result.errors).to.be.empty
})

it("can parse a valid text successfully #3 - Division", () => {
const result = parse("return 8 / 2 ;")
expect(result.errors).to.be.empty
})

it("can parse a valid text successfully #3 - RegExp", () => {
const result = parse("return /123/ ;")
expect(result.errors).to.be.empty
})

it("can parse a valid text successfully #3 - RegExp and Division", () => {
const result = parse("return /123/ / 5 ;")
expect(result.errors).to.be.empty
})
})

0 comments on commit a8fc00c

Please sign in to comment.