Skip to content

Commit

Permalink
WIP custom lexer adapters.
Browse files Browse the repository at this point in the history
fixes #528
  • Loading branch information
Shahar Soel authored and bd82 committed Jul 9, 2017
1 parent 64cc6ea commit 7ab67a3
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 74 deletions.
3 changes: 3 additions & 0 deletions lexer_adapter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
* parser.input method
- Maybe the parser should not be able to accept tokenVector array at constructor?
- the input should have an any (or generic T argument?) not specific to tokenVector
170 changes: 96 additions & 74 deletions src/parse/parser_public.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,16 @@ export type IgnoredRuleIssues = { [dslNameAndOccurrence: string]: boolean }
export type IgnoredParserIssues = { [ruleName: string]: IgnoredRuleIssues }

const IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException"
const END_OF_FILE = createTokenInstance(EOF, "", NaN, NaN, NaN, NaN, NaN, NaN)
export const END_OF_FILE = createTokenInstance(
EOF,
"",
NaN,
NaN,
NaN,
NaN,
NaN,
NaN
)
Object.freeze(END_OF_FILE)

export type TokenMatcher = (
Expand Down Expand Up @@ -543,11 +552,10 @@ export class Parser {
protected maxLookahead: number
protected ignoredIssues: IgnoredParserIssues
protected outputCst: boolean

// adapters
protected errorMessageProvider: IErrorMessageProvider

protected _input: IToken[] = []
protected inputIdx = -1
protected savedTokenIdx = -1
protected isBackTrackingStack = []
protected className: string
protected RULE_STACK: string[] = []
Expand All @@ -572,6 +580,12 @@ export class Parser {
private LAST_EXPLICIT_RULE_STACK: number[] = []
private selfAnalysisDone = false

// lexerState
private tokVector: IToken[]
private tokVectorLength
private currIdx: number = -1
private savedLexerState: number

/**
* Only used internally for storing productions as they are built for the first time.
* The final productions should be accessed from the static cache.
Expand All @@ -586,7 +600,7 @@ export class Parser {
| IMultiModeLexerDefinition,
config: IParserConfig = DEFAULT_PARSER_CONFIG
) {
this._input = input
this.input = input

// configuration
this.recoveryEnabled = has(config, "recoveryEnabled")
Expand Down Expand Up @@ -717,15 +731,6 @@ export class Parser {
this._errors = newErrors
}

public set input(newInput: IToken[]) {
this.reset()
this._input = newInput
}

public get input(): IToken[] {
return cloneArr(this._input)
}

/**
* Resets the parser state, should be overridden for custom parsers which "carry" additional state.
* When overriding, remember to also invoke the super implementation!
Expand All @@ -735,7 +740,6 @@ export class Parser {

this.isBackTrackingStack = []
this.errors = []
this._input = []
this.RULE_STACK = []
this.LAST_EXPLICIT_RULE_STACK = []
this.CST_STACK = []
Expand Down Expand Up @@ -900,19 +904,6 @@ export class Parser {
}
}

// skips a token and returns the next token
protected SKIP_TOKEN(): IToken {
// example: assume 45 tokens in the input, if input index is 44 it means that NEXT_TOKEN will return
// input[45] which is the 46th item and no longer exists,
// so in this case the largest valid input index is 43 (input.length - 2 )
if (this.inputIdx <= this._input.length - 2) {
this.consumeToken()
return this.LA(1)
} else {
return END_OF_FILE
}
}

// Parsing DSL
/**
* Convenience method equivalent to CONSUME1.
Expand Down Expand Up @@ -1831,57 +1822,14 @@ export class Parser {
}
}

/**
* Convenience method equivalent to LA(1)
* It is no longer used directly in chevrotain due to
* performance considerations (avoid the need for inlining optimizations).
*
* But it is maintained for backward compatibility reasons.
*
* @deprecated
*/
protected NEXT_TOKEN(): IToken {
return this.LA(1)
}

// Lexer (accessing Token vector) related methods which can be overridden to implement lazy lexers
// or lexers dependent on parser context.
protected LA(howMuch: number): IToken {
if (this._input.length <= this.inputIdx + howMuch) {
return END_OF_FILE
} else {
return this._input[this.inputIdx + howMuch]
}
}

protected consumeToken() {
this.inputIdx++
}

protected saveLexerState() {
this.savedTokenIdx = this.inputIdx
}

protected restoreLexerState() {
this.inputIdx = this.savedTokenIdx
}

protected resetLexerState(): void {
this.inputIdx = -1
}

protected moveLexerStateToEnd(): void {
this.inputIdx = this.input.length - 1
}

// other functionality
private saveRecogState(): IParserState {
// errors is a getter which will clone the errors array
let savedErrors = this.errors
let savedRuleStack = cloneArr(this.RULE_STACK)
return {
errors: savedErrors,
lexerState: this.inputIdx,
lexerState: this.exportLexerState(),
RULE_STACK: savedRuleStack,
CST_STACK: this.CST_STACK,
LAST_EXPLICIT_RULE_STACK: this.LAST_EXPLICIT_RULE_STACK
Expand All @@ -1890,7 +1838,7 @@ export class Parser {

private reloadRecogState(newState: IParserState) {
this.errors = newState.errors
this.inputIdx = newState.lexerState
this.importLexerState(newState.lexerState)
this.RULE_STACK = newState.RULE_STACK
}

Expand Down Expand Up @@ -2004,7 +1952,7 @@ export class Parser {
}
} else if (isFirstInvokedRule) {
// otherwise a Redundant input error will be created as well and we cannot guarantee that this is indeed the case
this.moveLexerStateToEnd()
this.moveToTerminatedState()
// the parser should never throw one of its own errors outside its flow.
// even if error recovery is disabled
return recoveryValueFunc()
Expand Down Expand Up @@ -3291,6 +3239,80 @@ export class Parser {
ruleCstResult
)
}

// lexer related methods
public set input(newInput: IToken[]) {
this.reset()
this.tokVector = newInput
this.tokVectorLength = newInput.length
}

public get input(): IToken[] {
return this.tokVector
}

// skips a token and returns the next token
protected SKIP_TOKEN(): IToken {
if (this.currIdx <= this.tokVector.length - 2) {
this.consumeToken()
return this.LA(1)
} else {
return END_OF_FILE
}
}

/**
* Convenience method equivalent to LA(1)
* It is no longer used directly in chevrotain due to
* performance considerations (avoid the need for inlining optimizations).
*
* But it is maintained for backward compatibility reasons.
*
* @deprecated
*/
protected NEXT_TOKEN(): IToken {
return this.LA(1)
}

// Lexer (accessing Token vector) related methods which can be overridden to implement lazy lexers
// or lexers dependent on parser context.
protected LA(howMuch: number): IToken {
// TODO: is this optimization (saving tokVectorLength benefits?)
if (this.tokVectorLength <= this.currIdx + howMuch) {
return END_OF_FILE
} else {
return this.tokVector[this.currIdx + howMuch]
}
}

protected consumeToken() {
this.currIdx++
}

protected exportLexerState(): number {
return this.currIdx
}

protected importLexerState(newState: number) {
this.currIdx = newState
}

// TODO: use export/import to describe save/restore?
protected saveLexerState() {
this.savedLexerState = this.currIdx
}

protected restoreLexerState() {
this.currIdx = this.savedLexerState
}

protected resetLexerState(): void {
this.currIdx = -1
}

moveToTerminatedState(): void {
this.currIdx = this.tokVector.length - 1
}
}

function InRuleRecoveryException(message: string) {
Expand Down

0 comments on commit 7ab67a3

Please sign in to comment.