diff --git a/lexer_adapter.md b/lexer_adapter.md new file mode 100644 index 0000000000..38f4bf3b48 --- /dev/null +++ b/lexer_adapter.md @@ -0,0 +1,3 @@ +* parser.input method + - Maybe the parser should not be able to accept tokenVector array at constructor? + - the input should have an any (or generic T argument?) not specific to tokenVector \ No newline at end of file diff --git a/src/parse/parser_public.ts b/src/parse/parser_public.ts index 9c24f1f59b..881e3029a8 100644 --- a/src/parse/parser_public.ts +++ b/src/parse/parser_public.ts @@ -126,7 +126,16 @@ export type IgnoredRuleIssues = { [dslNameAndOccurrence: string]: boolean } export type IgnoredParserIssues = { [ruleName: string]: IgnoredRuleIssues } const IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException" -const END_OF_FILE = createTokenInstance(EOF, "", NaN, NaN, NaN, NaN, NaN, NaN) +export const END_OF_FILE = createTokenInstance( + EOF, + "", + NaN, + NaN, + NaN, + NaN, + NaN, + NaN +) Object.freeze(END_OF_FILE) export type TokenMatcher = ( @@ -543,11 +552,10 @@ export class Parser { protected maxLookahead: number protected ignoredIssues: IgnoredParserIssues protected outputCst: boolean + + // adapters protected errorMessageProvider: IErrorMessageProvider - protected _input: IToken[] = [] - protected inputIdx = -1 - protected savedTokenIdx = -1 protected isBackTrackingStack = [] protected className: string protected RULE_STACK: string[] = [] @@ -572,6 +580,12 @@ export class Parser { private LAST_EXPLICIT_RULE_STACK: number[] = [] private selfAnalysisDone = false + // lexerState + private tokVector: IToken[] + private tokVectorLength + private currIdx: number = -1 + private savedLexerState: number + /** * Only used internally for storing productions as they are built for the first time. * The final productions should be accessed from the static cache. @@ -586,7 +600,7 @@ export class Parser { | IMultiModeLexerDefinition, config: IParserConfig = DEFAULT_PARSER_CONFIG ) { - this._input = input + this.input = input // configuration this.recoveryEnabled = has(config, "recoveryEnabled") @@ -717,15 +731,6 @@ export class Parser { this._errors = newErrors } - public set input(newInput: IToken[]) { - this.reset() - this._input = newInput - } - - public get input(): IToken[] { - return cloneArr(this._input) - } - /** * Resets the parser state, should be overridden for custom parsers which "carry" additional state. * When overriding, remember to also invoke the super implementation! @@ -735,7 +740,6 @@ export class Parser { this.isBackTrackingStack = [] this.errors = [] - this._input = [] this.RULE_STACK = [] this.LAST_EXPLICIT_RULE_STACK = [] this.CST_STACK = [] @@ -900,19 +904,6 @@ export class Parser { } } - // skips a token and returns the next token - protected SKIP_TOKEN(): IToken { - // example: assume 45 tokens in the input, if input index is 44 it means that NEXT_TOKEN will return - // input[45] which is the 46th item and no longer exists, - // so in this case the largest valid input index is 43 (input.length - 2 ) - if (this.inputIdx <= this._input.length - 2) { - this.consumeToken() - return this.LA(1) - } else { - return END_OF_FILE - } - } - // Parsing DSL /** * Convenience method equivalent to CONSUME1. @@ -1831,49 +1822,6 @@ export class Parser { } } - /** - * Convenience method equivalent to LA(1) - * It is no longer used directly in chevrotain due to - * performance considerations (avoid the need for inlining optimizations). - * - * But it is maintained for backward compatibility reasons. - * - * @deprecated - */ - protected NEXT_TOKEN(): IToken { - return this.LA(1) - } - - // Lexer (accessing Token vector) related methods which can be overridden to implement lazy lexers - // or lexers dependent on parser context. - protected LA(howMuch: number): IToken { - if (this._input.length <= this.inputIdx + howMuch) { - return END_OF_FILE - } else { - return this._input[this.inputIdx + howMuch] - } - } - - protected consumeToken() { - this.inputIdx++ - } - - protected saveLexerState() { - this.savedTokenIdx = this.inputIdx - } - - protected restoreLexerState() { - this.inputIdx = this.savedTokenIdx - } - - protected resetLexerState(): void { - this.inputIdx = -1 - } - - protected moveLexerStateToEnd(): void { - this.inputIdx = this.input.length - 1 - } - // other functionality private saveRecogState(): IParserState { // errors is a getter which will clone the errors array @@ -1881,7 +1829,7 @@ export class Parser { let savedRuleStack = cloneArr(this.RULE_STACK) return { errors: savedErrors, - lexerState: this.inputIdx, + lexerState: this.exportLexerState(), RULE_STACK: savedRuleStack, CST_STACK: this.CST_STACK, LAST_EXPLICIT_RULE_STACK: this.LAST_EXPLICIT_RULE_STACK @@ -1890,7 +1838,7 @@ export class Parser { private reloadRecogState(newState: IParserState) { this.errors = newState.errors - this.inputIdx = newState.lexerState + this.importLexerState(newState.lexerState) this.RULE_STACK = newState.RULE_STACK } @@ -2004,7 +1952,7 @@ export class Parser { } } else if (isFirstInvokedRule) { // otherwise a Redundant input error will be created as well and we cannot guarantee that this is indeed the case - this.moveLexerStateToEnd() + this.moveToTerminatedState() // the parser should never throw one of its own errors outside its flow. // even if error recovery is disabled return recoveryValueFunc() @@ -3291,6 +3239,80 @@ export class Parser { ruleCstResult ) } + + // lexer related methods + public set input(newInput: IToken[]) { + this.reset() + this.tokVector = newInput + this.tokVectorLength = newInput.length + } + + public get input(): IToken[] { + return this.tokVector + } + + // skips a token and returns the next token + protected SKIP_TOKEN(): IToken { + if (this.currIdx <= this.tokVector.length - 2) { + this.consumeToken() + return this.LA(1) + } else { + return END_OF_FILE + } + } + + /** + * Convenience method equivalent to LA(1) + * It is no longer used directly in chevrotain due to + * performance considerations (avoid the need for inlining optimizations). + * + * But it is maintained for backward compatibility reasons. + * + * @deprecated + */ + protected NEXT_TOKEN(): IToken { + return this.LA(1) + } + + // Lexer (accessing Token vector) related methods which can be overridden to implement lazy lexers + // or lexers dependent on parser context. + protected LA(howMuch: number): IToken { + // TODO: is this optimization (saving tokVectorLength benefits?) + if (this.tokVectorLength <= this.currIdx + howMuch) { + return END_OF_FILE + } else { + return this.tokVector[this.currIdx + howMuch] + } + } + + protected consumeToken() { + this.currIdx++ + } + + protected exportLexerState(): number { + return this.currIdx + } + + protected importLexerState(newState: number) { + this.currIdx = newState + } + + // TODO: use export/import to describe save/restore? + protected saveLexerState() { + this.savedLexerState = this.currIdx + } + + protected restoreLexerState() { + this.currIdx = this.savedLexerState + } + + protected resetLexerState(): void { + this.currIdx = -1 + } + + moveToTerminatedState(): void { + this.currIdx = this.tokVector.length - 1 + } } function InRuleRecoveryException(message: string) {