Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce tokenizing options for full and partial mode #1669

Merged
merged 3 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 20 additions & 19 deletions packages/langium/src/parser/indentation-aware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition, TokenVocabulary } from 'chevrotain';
import type { Grammar, TerminalRule } from '../languages/generated/ast.js';
import type { LexingReport, TokenBuilderOptions } from './token-builder.js';
import type { LexerResult } from './lexer.js';
import type { LexerResult, TokenizeOptions } from './lexer.js';
import type { LangiumCoreServices } from '../services.js';
import { createToken, createTokenInstance, Lexer } from 'chevrotain';
import { DefaultTokenBuilder } from './token-builder.js';
import { DefaultLexer, isTokenTypeArray } from './lexer.js';
import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js';

type IndentationAwareDelimiter<TokenName extends string> = [begin: TokenName, end: TokenName];

Expand Down Expand Up @@ -179,11 +179,11 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
}
}

override popLexingReport(text: string): IndentationLexingReport {
const result = super.popLexingReport(text);
override flushLexingReport(text: string): IndentationLexingReport {
const result = super.flushLexingReport(text);
return {
...result,
remainingDedents: this.popRemainingDedents(text),
remainingDedents: this.flushRemainingDedents(text),
};
}

Expand All @@ -203,9 +203,12 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
*
* @param text The full input string.
* @param offset The current position at which to attempt a match
* @param tokens Previously scanned tokens
* @param groups Token Groups
* @returns The current and previous indentation levels and the matched whitespace
*/
protected matchWhitespace(text: string, offset: number, _tokens: IToken[], _groups: Record<string, IToken[]>): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
protected matchWhitespace(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } {
this.whitespaceRegExp.lastIndex = offset;
const match = this.whitespaceRegExp.exec(text);
return {
Expand Down Expand Up @@ -251,12 +254,10 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
*
* @param text The full input string.
* @param offset The offset at which to attempt a match
* @param tokens Previously scanned Tokens
* @param tokens Previously scanned tokens
* @param groups Token Groups
*/
protected indentMatcher(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): ReturnType<CustomPatternMatcherFunc> {
const { indentTokenName } = this.options;

if (!this.isStartOfLine(text, offset)) {
return null;
}
Expand All @@ -274,7 +275,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
const indentToken = this.createIndentationTokenInstance(
this.indentTokenType,
text,
match?.[0] ?? indentTokenName,
match?.[0] ?? '',
offset,
);
tokens.push(indentToken);
Expand All @@ -288,12 +289,10 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
*
* @param text The full input string.
* @param offset The offset at which to attempt a match
* @param tokens Previously scanned Tokens
* @param tokens Previously scanned tokens
* @param groups Token Groups
*/
protected dedentMatcher(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): ReturnType<CustomPatternMatcherFunc> {
const { dedentTokenName } = this.options;

if (!this.isStartOfLine(text, offset)) {
return null;
}
Expand All @@ -316,7 +315,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
offset,
length: match?.[0]?.length ?? 0,
line: this.getLineNumber(text, offset),
column: 0
column: 1
});
return null;
}
Expand All @@ -327,7 +326,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
const token = this.createIndentationTokenInstance(
this.dedentTokenType,
text,
match?.[0] ?? dedentTokenName,
match?.[0] ?? '',
offset,
);
tokens.push(token);
Expand Down Expand Up @@ -362,7 +361,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
* @param text Full text that was tokenized
* @returns Remaining dedent tokens to match all previous indents at the end of the file
*/
popRemainingDedents(text: string): IToken[] {
flushRemainingDedents(text: string): IToken[] {
const remainingDedents: IToken[] = [];
while (this.indentationStack.length > 1) {
remainingDedents.push(
Expand Down Expand Up @@ -402,13 +401,15 @@ export class IndentationAwareLexer extends DefaultLexer {
}
}

override tokenize(text: string): LexerResult {
override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
const result = super.tokenize(text);

// consuming all remaining dedents and remove them as they might not be serializable
const report = result.report as IndentationLexingReport;
const remainingDedents = report.remainingDedents;
result.tokens.push(...remainingDedents);
if (options?.mode === 'full') {
// auto-complete document with remaining dedents
result.tokens.push(...report.remainingDedents);
}
report.remainingDedents = [];

// remove any "indent-dedent" pair with an empty body as these are typically
Expand Down
2 changes: 1 addition & 1 deletion packages/langium/src/parser/langium-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser {

parse(input: string): CompletionParserResult {
this.resetState();
const tokens = this.lexer.tokenize(input);
const tokens = this.lexer.tokenize(input, { mode: 'partial' });
this.tokens = tokens.tokens;
this.wrapper.input = [...this.tokens];
this.mainRule.call(this.wrapper, {});
Expand Down
16 changes: 12 additions & 4 deletions packages/langium/src/parser/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,17 @@ export interface LexerResult {
report?: LexingReport;
}

export type TokenizeMode = 'full' | 'partial';

export interface TokenizeOptions {
mode?: TokenizeMode;
}

export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' };

export interface Lexer {
readonly definition: TokenTypeDictionary;
tokenize(text: string): LexerResult;
tokenize(text: string, options?: TokenizeOptions): LexerResult;
}

export class DefaultLexer implements Lexer {
Expand All @@ -36,7 +44,7 @@ export class DefaultLexer implements Lexer {
protected tokenBuilder: TokenBuilder;
protected tokenTypes: TokenTypeDictionary;

constructor( services: LangiumCoreServices) {
constructor(services: LangiumCoreServices) {
this.tokenBuilder = services.parser.TokenBuilder;
const tokens = this.tokenBuilder.buildTokens(services.Grammar, {
caseInsensitive: services.LanguageMetaData.caseInsensitive
Expand All @@ -52,13 +60,13 @@ export class DefaultLexer implements Lexer {
return this.tokenTypes;
}

tokenize(text: string): LexerResult {
tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
const chevrotainResult = this.chevrotainLexer.tokenize(text);
return {
tokens: chevrotainResult.tokens,
errors: chevrotainResult.errors,
hidden: chevrotainResult.groups.hidden ?? [],
report: this.tokenBuilder.popLexingReport?.(text)
report: this.tokenBuilder.flushLexingReport?.(text)
};
}

Expand Down
9 changes: 6 additions & 3 deletions packages/langium/src/parser/token-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export interface TokenBuilder {
*
* @param text The text that was tokenized.
*/
popLexingReport?(text: string): LexingReport;
flushLexingReport?(text: string): LexingReport;
}

/**
Expand All @@ -36,8 +36,10 @@ export interface LexingReport {
diagnostics: LexingDiagnostic[];
}

export type LexingDiagnosticSeverity = 'error' | 'warning' | 'info' | 'hint';

export interface LexingDiagnostic extends ILexingError {
severity?: 'error' | 'warning' | 'info' | 'hint';
severity?: LexingDiagnosticSeverity;
}

export class DefaultTokenBuilder implements TokenBuilder {
Expand All @@ -64,7 +66,8 @@ export class DefaultTokenBuilder implements TokenBuilder {
return tokens;
}

popLexingReport(_text: string): LexingReport {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
flushLexingReport(text: string): LexingReport {
return { diagnostics: this.popDiagnostics() };
}

Expand Down
14 changes: 7 additions & 7 deletions packages/langium/src/validation/document-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ import type { ParseResult } from '../parser/langium-parser.js';
import type { LangiumCoreServices } from '../services.js';
import type { AstNode, CstNode } from '../syntax-tree.js';
import type { LangiumDocument } from '../workspace/documents.js';
import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry } from './validation-registry.js';
import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry, ValidationSeverity } from './validation-registry.js';
import { CancellationToken } from '../utils/cancellation.js';
import { findNodeForKeyword, findNodeForProperty } from '../utils/grammar-utils.js';
import { streamAst } from '../utils/ast-utils.js';
import { tokenToRange } from '../utils/cst-utils.js';
import { interruptAndCheck, isOperationCancelled } from '../utils/promise-utils.js';
import { diagnosticData } from './validation-registry.js';
import type { LexingDiagnostic } from '../parser/token-builder.js';
import type { LexingDiagnostic, LexingDiagnosticSeverity } from '../parser/token-builder.js';

export interface ValidationOptions {
/**
Expand Down Expand Up @@ -100,7 +100,7 @@ export class DefaultDocumentValidator implements DocumentValidator {
protected processLexingErrors(parseResult: ParseResult, diagnostics: Diagnostic[], _options: ValidationOptions): void {
const lexerDiagnostics = [...parseResult.lexerErrors, ...parseResult.lexerReport?.diagnostics ?? []] as LexingDiagnostic[];
for (const lexerDiagnostic of lexerDiagnostics) {
const severity = lexerDiagnostic?.severity ?? 'error';
const severity = lexerDiagnostic.severity ?? 'error';
const diagnostic: Diagnostic = {
severity: toDiagnosticSeverity(severity),
range: {
Expand Down Expand Up @@ -180,7 +180,7 @@ export class DefaultDocumentValidator implements DocumentValidator {

protected async validateAst(rootNode: AstNode, options: ValidationOptions, cancelToken = CancellationToken.None): Promise<Diagnostic[]> {
const validationItems: Diagnostic[] = [];
const acceptor: ValidationAcceptor = <N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N>) => {
const acceptor: ValidationAcceptor = <N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N>) => {
validationItems.push(this.toDiagnostic(severity, message, info));
};

Expand All @@ -194,7 +194,7 @@ export class DefaultDocumentValidator implements DocumentValidator {
return validationItems;
}

protected toDiagnostic<N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N, string>): Diagnostic {
protected toDiagnostic<N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N, string>): Diagnostic {
return {
message,
range: getDiagnosticRange(info),
Expand Down Expand Up @@ -233,7 +233,7 @@ export function getDiagnosticRange<N extends AstNode>(info: DiagnosticInfo<N, st
return cstNode.range;
}

export function toDiagnosticSeverity(severity: 'error' | 'warning' | 'info' | 'hint'): DiagnosticSeverity {
export function toDiagnosticSeverity(severity: LexingDiagnosticSeverity): DiagnosticSeverity {
switch (severity) {
case 'error':
return 1; // according to vscode-languageserver-types/lib/esm/main.js#DiagnosticSeverity.Error
Expand All @@ -248,7 +248,7 @@ export function toDiagnosticSeverity(severity: 'error' | 'warning' | 'info' | 'h
}
}

export function toDiagnosticData(severity: 'error' | 'warning' | 'info' | 'hint'): DiagnosticData {
export function toDiagnosticData(severity: LexingDiagnosticSeverity): DiagnosticData {
switch (severity) {
case 'error':
return diagnosticData(DocumentValidator.LexingError);
Expand Down
4 changes: 3 additions & 1 deletion packages/langium/src/validation/validation-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ export function diagnosticData(code: string): DiagnosticData {
return { code };
}

export type ValidationAcceptor = <N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N>) => void
export type ValidationSeverity = 'error' | 'warning' | 'info' | 'hint';

export type ValidationAcceptor = <N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N>) => void

export type ValidationCheck<T extends AstNode = AstNode> = (node: T, accept: ValidationAcceptor, cancelToken: CancellationToken) => MaybePromise<void>;

Expand Down
36 changes: 35 additions & 1 deletion packages/langium/test/parser/indentation-aware.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { EmptyFileSystem, IndentationAwareLexer, IndentationAwareTokenBuilder }
import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar';
import type { LangiumServices, PartialLangiumServices } from 'langium/lsp';
import { expandToString } from 'langium/generate';
import { parseHelper } from 'langium/test';
import { expectCompletion, parseHelper } from 'langium/test';
import type { IMultiModeLexerDefinition } from 'chevrotain';

const grammarServices = createLangiumGrammarServices(EmptyFileSystem).grammar;
Expand Down Expand Up @@ -193,6 +193,18 @@ describe('IndentationAwareLexer', () => {
expect(dedent.tokenType.name).toBe('DEDENT');
});

test('should NOT add remaining dedents to the end if partial tokenizing', async () => {
const lexer = await getLexer(sampleGrammar);
const { tokens } = lexer.tokenize(expandToString`
// single-line comment
{
name`, { mode: 'partial' });
expect(tokens).toHaveLength(3);

const [/* L_BRAC */, indent, /* id */] = tokens;
expect(indent.tokenType.name).toBe('INDENT');
});

test('should not return any tokens for empty input', async () => {
const lexer = await getLexer(sampleGrammar);
const { tokens } = lexer.tokenize('');
Expand Down Expand Up @@ -389,6 +401,28 @@ describe('IndentationAware parsing', () => {
expect(return2.value).toBe(true);
});

test.fails('should offer correct auto-completion parsing', async () => {
const text = expandToString`
<|>if true:
<|>return true
<|>else:
<|>if false:
<|>return true
<|>return false
<|>return true
`;

const services = await createIndentationAwareServices(sampleGrammar);
const completion = expectCompletion(services);
await completion({ text, index: 0, expectedItems: ['if', 'return'] });
// PR 1669: the lines below currently fail as the completion provider may wrongly assumes that all whitespace tokens are hidden
await completion({ text, index: 1, expectedItems: ['if', 'return'] });
await completion({ text, index: 2, expectedItems: ['else'] });
await completion({ text, index: 3, expectedItems: ['if', 'return'] });
await completion({ text, index: 4, expectedItems: ['if', 'return'] });
await completion({ text, index: 5, expectedItems: ['if', 'return'] });
await completion({ text, index: 6, expectedItems: ['if', 'return'] });
});
});

type Statement = If | Return;
Expand Down
Loading