Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support pluggable lookahead strategy #1852

Merged
merged 3 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions packages/chevrotain/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ export {
tokenName
} from "./scan/tokens_public"

// Lookahead

export { getLookaheadPaths } from "./parse/grammar/lookahead"

export { LLkLookaheadStrategy } from "./parse/grammar/llk_lookahead"

// Other Utilities

export { defaultParserErrorProvider } from "./parse/errors_public"
Expand Down
64 changes: 25 additions & 39 deletions packages/chevrotain/src/parse/grammar/checks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@ import {
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Rule,
Terminal
} from "@chevrotain/gast"
import { GAstVisitor } from "@chevrotain/gast"
import {
ILookaheadStrategy,
IProduction,
IProductionWithOccurrence,
TokenType
TokenType,
Rule
} from "@chevrotain/types"
import {
IGrammarValidatorErrorMessageProvider,
Expand All @@ -56,45 +57,34 @@ import dropRight from "lodash/dropRight"
import compact from "lodash/compact"
import { tokenStructuredMatcher } from "../../scan/tokens"

export function validateLookahead(options: {
lookaheadStrategy: ILookaheadStrategy
rules: Rule[]
tokenTypes: TokenType[]
grammarName: string
}): IParserDefinitionError[] {
const lookaheadValidationErrorMessages = options.lookaheadStrategy.validate({
rules: options.rules,
tokenTypes: options.tokenTypes,
grammarName: options.grammarName
})
return map(lookaheadValidationErrorMessages, (errorMessage) => ({
type: ParserDefinitionErrorType.CUSTOM_LOOKAHEAD_VALIDATION,
...errorMessage
}))
}

export function validateGrammar(
topLevels: Rule[],
globalMaxLookahead: number,
tokenTypes: TokenType[],
errMsgProvider: IGrammarValidatorErrorMessageProvider,
grammarName: string
): IParserDefinitionError[] {
const duplicateErrors = flatMap(topLevels, (currTopLevel) =>
validateDuplicateProductions(currTopLevel, errMsgProvider)
)
const leftRecursionErrors = flatMap(topLevels, (currTopRule) =>
validateNoLeftRecursion(currTopRule, currTopRule, errMsgProvider)
const duplicateErrors: IParserDefinitionError[] = flatMap(
topLevels,
(currTopLevel) => validateDuplicateProductions(currTopLevel, errMsgProvider)
)

let emptyAltErrors: IParserEmptyAlternativeDefinitionError[] = []
let ambiguousAltsErrors: IParserAmbiguousAlternativesDefinitionError[] = []
let emptyRepetitionErrors: IParserDefinitionError[] = []

// left recursion could cause infinite loops in the following validations.
// It is safest to first have the user fix the left recursion errors first and only then examine Further issues.
if (isEmpty(leftRecursionErrors)) {
emptyAltErrors = flatMap(topLevels, (currTopRule) =>
validateEmptyOrAlternative(currTopRule, errMsgProvider)
)
ambiguousAltsErrors = flatMap(topLevels, (currTopRule) =>
validateAmbiguousAlternationAlternatives(
currTopRule,
globalMaxLookahead,
errMsgProvider
)
)

emptyRepetitionErrors = validateSomeNonEmptyLookaheadPath(
topLevels,
globalMaxLookahead,
errMsgProvider
)
}

const termsNamespaceConflictErrors = checkTerminalAndNoneTerminalsNameSpace(
topLevels,
tokenTypes,
Expand All @@ -114,11 +104,7 @@ export function validateGrammar(
)
)

return (duplicateErrors as IParserDefinitionError[]).concat(
emptyRepetitionErrors,
leftRecursionErrors,
emptyAltErrors,
ambiguousAltsErrors,
return duplicateErrors.concat(
termsNamespaceConflictErrors,
tooManyAltsErrors,
duplicateRulesError
Expand Down Expand Up @@ -292,7 +278,7 @@ export function validateNoLeftRecursion(
return []
} else {
const ruleName = topRule.name
const foundLeftRecursion = includes(<any>nextNonTerminals, topRule)
const foundLeftRecursion = includes(nextNonTerminals, topRule)
if (foundLeftRecursion) {
errors.push({
message: errMsgProvider.buildLeftRecursionError({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ export function resolveGrammar(

export function validateGrammar(options: {
rules: Rule[]
maxLookahead: number
tokenTypes: TokenType[]
grammarName: string
errMsgProvider: IGrammarValidatorErrorMessageProvider
Expand All @@ -45,7 +44,6 @@ export function validateGrammar(options: {

return orgValidateGrammar(
options.rules,
options.maxLookahead,
options.tokenTypes,
options.errMsgProvider,
options.grammarName
Expand Down
140 changes: 140 additions & 0 deletions packages/chevrotain/src/parse/grammar/llk_lookahead.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import {
ILookaheadStrategy,
ILookaheadValidationError,
IOrAlt,
Rule,
TokenType,
OptionalProductionType
} from "@chevrotain/types"
import flatMap from "lodash/flatMap"
import isEmpty from "lodash/isEmpty"
import { defaultGrammarValidatorErrorProvider } from "../errors_public"
import { DEFAULT_PARSER_CONFIG } from "../parser/parser"
import {
validateAmbiguousAlternationAlternatives,
validateEmptyOrAlternative,
validateNoLeftRecursion,
validateSomeNonEmptyLookaheadPath
} from "./checks"
import {
buildAlternativesLookAheadFunc,
buildLookaheadFuncForOptionalProd,
buildLookaheadFuncForOr,
buildSingleAlternativeLookaheadFunction,
getProdType
} from "./lookahead"
import { IParserDefinitionError } from "./types"

export class LLkLookaheadStrategy implements ILookaheadStrategy {
readonly maxLookahead: number

constructor(options: { maxLookahead?: number }) {
this.maxLookahead =
options.maxLookahead ?? DEFAULT_PARSER_CONFIG.maxLookahead
}

validate(options: {
rules: Rule[]
tokenTypes: TokenType[]
grammarName: string
}): ILookaheadValidationError[] {
const leftRecursionErrors = this.validateNoLeftRecursion(options.rules)

if (isEmpty(leftRecursionErrors)) {
const emptyAltErrors = this.validateEmptyOrAlternatives(options.rules)
const ambiguousAltsErrors = this.validateAmbiguousAlternationAlternatives(
options.rules,
this.maxLookahead
)
const emptyRepetitionErrors = this.validateSomeNonEmptyLookaheadPath(
options.rules,
this.maxLookahead
)
const allErrors = [
...leftRecursionErrors,
...emptyAltErrors,
...ambiguousAltsErrors,
...emptyRepetitionErrors
]
return allErrors
}
return leftRecursionErrors
}

validateNoLeftRecursion(rules: Rule[]): IParserDefinitionError[] {
return flatMap(rules, (currTopRule) =>
validateNoLeftRecursion(
currTopRule,
currTopRule,
defaultGrammarValidatorErrorProvider
)
)
}

validateEmptyOrAlternatives(rules: Rule[]): IParserDefinitionError[] {
return flatMap(rules, (currTopRule) =>
validateEmptyOrAlternative(
currTopRule,
defaultGrammarValidatorErrorProvider
)
)
}

validateAmbiguousAlternationAlternatives(
rules: Rule[],
maxLookahead: number
): IParserDefinitionError[] {
return flatMap(rules, (currTopRule) =>
validateAmbiguousAlternationAlternatives(
currTopRule,
maxLookahead,
defaultGrammarValidatorErrorProvider
)
)
}

validateSomeNonEmptyLookaheadPath(
rules: Rule[],
maxLookahead: number
): IParserDefinitionError[] {
return validateSomeNonEmptyLookaheadPath(
rules,
maxLookahead,
defaultGrammarValidatorErrorProvider
)
}

buildLookaheadForAlternation(options: {
prodOccurrence: number
rule: Rule
maxLookahead: number
hasPredicates: boolean
dynamicTokensEnabled: boolean
}): (orAlts?: IOrAlt<any>[] | undefined) => number | undefined {
return buildLookaheadFuncForOr(
options.prodOccurrence,
options.rule,
options.maxLookahead,
options.hasPredicates,
options.dynamicTokensEnabled,
buildAlternativesLookAheadFunc
)
}

buildLookaheadForOptional(options: {
prodOccurrence: number
prodType: OptionalProductionType
rule: Rule
maxLookahead: number
dynamicTokensEnabled: boolean
}): () => boolean {
return buildLookaheadFuncForOptionalProd(
options.prodOccurrence,
options.rule,
options.maxLookahead,
options.dynamicTokensEnabled,
getProdType(options.prodType),
buildSingleAlternativeLookaheadFunction
)
}
}
Loading