WIP custom lexer adapters.

fixes #528
Chevrotain · Jul 9, 2017 · 7ab67a3 · 7ab67a3
1 parent 64cc6ea
commit 7ab67a3
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 74 deletions.
diff --git a/lexer_adapter.md b/lexer_adapter.md
@@ -0,0 +1,3 @@
+* parser.input method
+ - Maybe the parser should not be able to accept tokenVector array at constructor?
+ - the input should have an any (or generic T argument?) not specific to tokenVector
diff --git a/src/parse/parser_public.ts b/src/parse/parser_public.ts
@@ -126,7 +126,16 @@ export type IgnoredRuleIssues = { [dslNameAndOccurrence: string]: boolean }
 export type IgnoredParserIssues = { [ruleName: string]: IgnoredRuleIssues }
 
 const IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException"
-const END_OF_FILE = createTokenInstance(EOF, "", NaN, NaN, NaN, NaN, NaN, NaN)
+export const END_OF_FILE = createTokenInstance(
+ EOF,
+ "",
+ NaN,
+ NaN,
+ NaN,
+ NaN,
+ NaN,
+ NaN
+)
 Object.freeze(END_OF_FILE)
 
 export type TokenMatcher = (
@@ -543,11 +552,10 @@ export class Parser {
  protected maxLookahead: number
  protected ignoredIssues: IgnoredParserIssues
  protected outputCst: boolean
+
+ // adapters
  protected errorMessageProvider: IErrorMessageProvider
 
- protected _input: IToken[] = []
- protected inputIdx = -1
- protected savedTokenIdx = -1
  protected isBackTrackingStack = []
  protected className: string
  protected RULE_STACK: string[] = []
@@ -572,6 +580,12 @@ export class Parser {
  private LAST_EXPLICIT_RULE_STACK: number[] = []
  private selfAnalysisDone = false
 
+ // lexerState
+ private tokVector: IToken[]
+ private tokVectorLength
+ private currIdx: number = -1
+ private savedLexerState: number
+
  /**
  * Only used internally for storing productions as they are built for the first time.
  * The final productions should be accessed from the static cache.
@@ -586,7 +600,7 @@ export class Parser {
  | IMultiModeLexerDefinition,
  config: IParserConfig = DEFAULT_PARSER_CONFIG
  ) {
- this._input = input
+ this.input = input
 
  // configuration
  this.recoveryEnabled = has(config, "recoveryEnabled")
@@ -717,15 +731,6 @@ export class Parser {
  this._errors = newErrors
  }
 
- public set input(newInput: IToken[]) {
- this.reset()
- this._input = newInput
- }
-
- public get input(): IToken[] {
- return cloneArr(this._input)
- }
-
  /**
  * Resets the parser state, should be overridden for custom parsers which "carry" additional state.
  * When overriding, remember to also invoke the super implementation!
@@ -735,7 +740,6 @@ export class Parser {
 
  this.isBackTrackingStack = []
  this.errors = []
- this._input = []
  this.RULE_STACK = []
  this.LAST_EXPLICIT_RULE_STACK = []
  this.CST_STACK = []
@@ -900,19 +904,6 @@ export class Parser {
  }
  }
 
- // skips a token and returns the next token
- protected SKIP_TOKEN(): IToken {
- // example: assume 45 tokens in the input, if input index is 44 it means that NEXT_TOKEN will return
- // input[45] which is the 46th item and no longer exists,
- // so in this case the largest valid input index is 43 (input.length - 2 )
- if (this.inputIdx <= this._input.length - 2) {
- this.consumeToken()
- return this.LA(1)
- } else {
- return END_OF_FILE
- }
- }
-
  // Parsing DSL
  /**
  * Convenience method equivalent to CONSUME1.
@@ -1831,57 +1822,14 @@ export class Parser {
  }
  }
 
- /**
- * Convenience method equivalent to LA(1)
- * It is no longer used directly in chevrotain due to
- * performance considerations (avoid the need for inlining optimizations).
- *
- * But it is maintained for backward compatibility reasons.
- *
- * @deprecated
- */
- protected NEXT_TOKEN(): IToken {
- return this.LA(1)
- }
-
- // Lexer (accessing Token vector) related methods which can be overridden to implement lazy lexers
- // or lexers dependent on parser context.
- protected LA(howMuch: number): IToken {
- if (this._input.length <= this.inputIdx + howMuch) {
- return END_OF_FILE
- } else {
- return this._input[this.inputIdx + howMuch]
- }
- }
-
- protected consumeToken() {
- this.inputIdx++
- }
-
- protected saveLexerState() {
- this.savedTokenIdx = this.inputIdx
- }
-
- protected restoreLexerState() {
- this.inputIdx = this.savedTokenIdx
- }
-
- protected resetLexerState(): void {
- this.inputIdx = -1
- }
-
- protected moveLexerStateToEnd(): void {
- this.inputIdx = this.input.length - 1
- }
-
  // other functionality
  private saveRecogState(): IParserState {
  // errors is a getter which will clone the errors array
  let savedErrors = this.errors
  let savedRuleStack = cloneArr(this.RULE_STACK)
  return {
  errors: savedErrors,
- lexerState: this.inputIdx,
+ lexerState: this.exportLexerState(),
  RULE_STACK: savedRuleStack,
  CST_STACK: this.CST_STACK,
  LAST_EXPLICIT_RULE_STACK: this.LAST_EXPLICIT_RULE_STACK
@@ -1890,7 +1838,7 @@ export class Parser {
 
  private reloadRecogState(newState: IParserState) {
  this.errors = newState.errors
- this.inputIdx = newState.lexerState
+ this.importLexerState(newState.lexerState)
  this.RULE_STACK = newState.RULE_STACK
  }
 
@@ -2004,7 +1952,7 @@ export class Parser {
  }
  } else if (isFirstInvokedRule) {
  // otherwise a Redundant input error will be created as well and we cannot guarantee that this is indeed the case
- this.moveLexerStateToEnd()
+ this.moveToTerminatedState()
  // the parser should never throw one of its own errors outside its flow.
  // even if error recovery is disabled
  return recoveryValueFunc()
@@ -3291,6 +3239,80 @@ export class Parser {
  ruleCstResult
  )
  }
+
+ // lexer related methods
+ public set input(newInput: IToken[]) {
+ this.reset()
+ this.tokVector = newInput
+ this.tokVectorLength = newInput.length
+ }
+
+ public get input(): IToken[] {
+ return this.tokVector
+ }
+
+ // skips a token and returns the next token
+ protected SKIP_TOKEN(): IToken {
+ if (this.currIdx <= this.tokVector.length - 2) {
+ this.consumeToken()
+ return this.LA(1)
+ } else {
+ return END_OF_FILE
+ }
+ }
+
+ /**
+ * Convenience method equivalent to LA(1)
+ * It is no longer used directly in chevrotain due to
+ * performance considerations (avoid the need for inlining optimizations).
+ *
+ * But it is maintained for backward compatibility reasons.
+ *
+ * @deprecated
+ */
+ protected NEXT_TOKEN(): IToken {
+ return this.LA(1)
+ }
+
+ // Lexer (accessing Token vector) related methods which can be overridden to implement lazy lexers
+ // or lexers dependent on parser context.
+ protected LA(howMuch: number): IToken {
+ // TODO: is this optimization (saving tokVectorLength benefits?)
+ if (this.tokVectorLength <= this.currIdx + howMuch) {
+ return END_OF_FILE
+ } else {
+ return this.tokVector[this.currIdx + howMuch]
+ }
+ }
+
+ protected consumeToken() {
+ this.currIdx++
+ }
+
+ protected exportLexerState(): number {
+ return this.currIdx
+ }
+
+ protected importLexerState(newState: number) {
+ this.currIdx = newState
+ }
+
+ // TODO: use export/import to describe save/restore?
+ protected saveLexerState() {
+ this.savedLexerState = this.currIdx
+ }
+
+ protected restoreLexerState() {
+ this.currIdx = this.savedLexerState
+ }
+
+ protected resetLexerState(): void {
+ this.currIdx = -1
+ }
+
+ moveToTerminatedState(): void {
+ this.currIdx = this.tokVector.length - 1
+ }
 }
 
 function InRuleRecoveryException(message: string) {