-
Notifications
You must be signed in to change notification settings - Fork 75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Extend Lexer interface to expose diagnostics and data as a lexing report #1668
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,11 +19,13 @@ import { isAssignment, isCrossReference, isKeyword } from '../languages/generate | |
import { getExplicitRuleType, isDataTypeRule } from '../utils/grammar-utils.js'; | ||
import { assignMandatoryProperties, getContainerOfType, linkContentToContainer } from '../utils/ast-utils.js'; | ||
import { CstNodeBuilder } from './cst-node-builder.js'; | ||
import type { LexingReport } from './token-builder.js'; | ||
|
||
export type ParseResult<T = AstNode> = { | ||
value: T, | ||
parserErrors: IRecognitionException[], | ||
lexerErrors: ILexingError[] | ||
lexerErrors: ILexingError[], | ||
lexerReport?: LexingReport | ||
Comment on lines
+27
to
+28
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These 2 can also be merged, with one being marked as deprecated, as mentioned in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I didn't wanna mark parts of the API as deprecated and @msujew already mentioned that he may re-work those parts when he does a 4.x release. But I fully agree that this is an odd API now. |
||
} | ||
|
||
export const DatatypeSymbol = Symbol('Datatype'); | ||
|
@@ -240,6 +242,7 @@ export class LangiumParser extends AbstractLangiumParser { | |
return { | ||
value: result, | ||
lexerErrors: lexerResult.errors, | ||
lexerReport: lexerResult.report, | ||
parserErrors: this.wrapper.errors | ||
}; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
import type { ILexingError, IMultiModeLexerDefinition, IToken, TokenType, TokenTypeDictionary, TokenVocabulary } from 'chevrotain'; | ||
import type { LangiumCoreServices } from '../services.js'; | ||
import { Lexer as ChevrotainLexer } from 'chevrotain'; | ||
import type { LexingReport, TokenBuilder } from './token-builder.js'; | ||
|
||
export interface LexerResult { | ||
/** | ||
|
@@ -21,6 +22,7 @@ export interface LexerResult { | |
*/ | ||
hidden: IToken[]; | ||
errors: ILexingError[]; | ||
report?: LexingReport; | ||
Comment on lines
24
to
+25
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Personally, I would mark |
||
} | ||
|
||
export interface Lexer { | ||
|
@@ -31,10 +33,12 @@ export interface Lexer { | |
export class DefaultLexer implements Lexer { | ||
|
||
protected chevrotainLexer: ChevrotainLexer; | ||
protected tokenBuilder: TokenBuilder; | ||
protected tokenTypes: TokenTypeDictionary; | ||
|
||
constructor(services: LangiumCoreServices) { | ||
const tokens = services.parser.TokenBuilder.buildTokens(services.Grammar, { | ||
constructor( services: LangiumCoreServices) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the extra space after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, this was a pure oversight ;-) Thanks for cacthing that! |
||
this.tokenBuilder = services.parser.TokenBuilder; | ||
const tokens = this.tokenBuilder.buildTokens(services.Grammar, { | ||
caseInsensitive: services.LanguageMetaData.caseInsensitive | ||
}); | ||
this.tokenTypes = this.toTokenTypeDictionary(tokens); | ||
|
@@ -53,7 +57,8 @@ export class DefaultLexer implements Lexer { | |
return { | ||
tokens: chevrotainResult.tokens, | ||
errors: chevrotainResult.errors, | ||
hidden: chevrotainResult.groups.hidden ?? [] | ||
hidden: chevrotainResult.groups.hidden ?? [], | ||
report: this.tokenBuilder.popLexingReport?.(text) | ||
}; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
* terms of the MIT License, which is available in the project root. | ||
******************************************************************************/ | ||
|
||
import type { CustomPatternMatcherFunc, TokenPattern, TokenType, TokenVocabulary } from 'chevrotain'; | ||
import type { CustomPatternMatcherFunc, ILexingError, TokenPattern, TokenType, TokenVocabulary } from 'chevrotain'; | ||
import type { AbstractRule, Grammar, Keyword, TerminalRule } from '../languages/generated/ast.js'; | ||
import type { Stream } from '../utils/stream.js'; | ||
import { Lexer } from 'chevrotain'; | ||
|
@@ -20,9 +20,31 @@ export interface TokenBuilderOptions { | |
|
||
export interface TokenBuilder { | ||
buildTokens(grammar: Grammar, options?: TokenBuilderOptions): TokenVocabulary; | ||
/** | ||
* Produces a lexing report for the given text that was just tokenized using the tokens provided by this builder. | ||
* | ||
* @param text The text that was tokenized. | ||
*/ | ||
popLexingReport?(text: string): LexingReport; | ||
} | ||
|
||
/** | ||
* A custom lexing report that can be produced by the token builder during the lexing process. | ||
* Adopters need to ensure that the any custom fields are serializable so they can be sent across worker threads. | ||
*/ | ||
export interface LexingReport { | ||
diagnostics: LexingDiagnostic[]; | ||
} | ||
|
||
export interface LexingDiagnostic extends ILexingError { | ||
severity?: 'error' | 'warning' | 'info' | 'hint'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably have some global There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that might be a good idea, I'll look into it. |
||
} | ||
|
||
export class DefaultTokenBuilder implements TokenBuilder { | ||
/** | ||
* The list of diagnostics stored during the lexing process of a single text. | ||
*/ | ||
protected diagnostics: LexingDiagnostic[] = []; | ||
|
||
buildTokens(grammar: Grammar, options?: TokenBuilderOptions): TokenVocabulary { | ||
const reachableRules = stream(getAllReachableRules(grammar, false)); | ||
|
@@ -42,6 +64,16 @@ export class DefaultTokenBuilder implements TokenBuilder { | |
return tokens; | ||
} | ||
|
||
popLexingReport(_text: string): LexingReport { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a name like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, the difficulty of good naming. I was debating over this and I previously had There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah that makes sense, but |
||
return { diagnostics: this.popDiagnostics() }; | ||
} | ||
|
||
protected popDiagnostics(): LexingDiagnostic[] { | ||
const diagnostics = [...this.diagnostics]; | ||
this.diagnostics = []; | ||
return diagnostics; | ||
} | ||
|
||
protected buildTerminalTokens(rules: Stream<AbstractRule>): TokenType[] { | ||
return rules.filter(isTerminalRule).filter(e => !e.fragment) | ||
.map(terminal => this.buildTerminalToken(terminal)).toArray(); | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -18,6 +18,7 @@ import { streamAst } from '../utils/ast-utils.js'; | |||||
import { tokenToRange } from '../utils/cst-utils.js'; | ||||||
import { interruptAndCheck, isOperationCancelled } from '../utils/promise-utils.js'; | ||||||
import { diagnosticData } from './validation-registry.js'; | ||||||
import type { LexingDiagnostic } from '../parser/token-builder.js'; | ||||||
|
||||||
export interface ValidationOptions { | ||||||
/** | ||||||
|
@@ -97,21 +98,23 @@ export class DefaultDocumentValidator implements DocumentValidator { | |||||
} | ||||||
|
||||||
protected processLexingErrors(parseResult: ParseResult, diagnostics: Diagnostic[], _options: ValidationOptions): void { | ||||||
for (const lexerError of parseResult.lexerErrors) { | ||||||
const lexerDiagnostics = [...parseResult.lexerErrors, ...parseResult.lexerReport?.diagnostics ?? []] as LexingDiagnostic[]; | ||||||
for (const lexerDiagnostic of lexerDiagnostics) { | ||||||
const severity = lexerDiagnostic?.severity ?? 'error'; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think the diagnostic element itself can be undefined
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Absolutely correct! |
||||||
const diagnostic: Diagnostic = { | ||||||
severity: toDiagnosticSeverity('error'), | ||||||
severity: toDiagnosticSeverity(severity), | ||||||
range: { | ||||||
start: { | ||||||
line: lexerError.line! - 1, | ||||||
character: lexerError.column! - 1 | ||||||
line: lexerDiagnostic.line! - 1, | ||||||
character: lexerDiagnostic.column! - 1 | ||||||
}, | ||||||
end: { | ||||||
line: lexerError.line! - 1, | ||||||
character: lexerError.column! + lexerError.length - 1 | ||||||
line: lexerDiagnostic.line! - 1, | ||||||
character: lexerDiagnostic.column! + lexerDiagnostic.length - 1 | ||||||
} | ||||||
}, | ||||||
message: lexerError.message, | ||||||
data: diagnosticData(DocumentValidator.LexingError), | ||||||
message: lexerDiagnostic.message, | ||||||
data: toDiagnosticData(severity), | ||||||
source: this.getSource() | ||||||
}; | ||||||
diagnostics.push(diagnostic); | ||||||
|
@@ -245,8 +248,26 @@ export function toDiagnosticSeverity(severity: 'error' | 'warning' | 'info' | 'h | |||||
} | ||||||
} | ||||||
|
||||||
export function toDiagnosticData(severity: 'error' | 'warning' | 'info' | 'hint'): DiagnosticData { | ||||||
switch (severity) { | ||||||
case 'error': | ||||||
return diagnosticData(DocumentValidator.LexingError); | ||||||
case 'warning': | ||||||
return diagnosticData(DocumentValidator.LexingWarning); | ||||||
case 'info': | ||||||
return diagnosticData(DocumentValidator.LexingInfo); | ||||||
case 'hint': | ||||||
return diagnosticData(DocumentValidator.LexingHint); | ||||||
Comment on lines
+252
to
+260
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally prefer a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I aligned this with the |
||||||
default: | ||||||
throw new Error('Invalid diagnostic severity: ' + severity); | ||||||
} | ||||||
} | ||||||
|
||||||
export namespace DocumentValidator { | ||||||
export const LexingError = 'lexing-error'; | ||||||
export const LexingWarning = 'lexing-warning'; | ||||||
export const LexingInfo = 'lexing-info'; | ||||||
export const LexingHint = 'lexing-hint'; | ||||||
export const ParsingError = 'parsing-error'; | ||||||
export const LinkingError = 'linking-error'; | ||||||
} | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you also update the JSDoc with the new parameters?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point!