diff --git a/packages/langium/src/parser/indentation-aware.ts b/packages/langium/src/parser/indentation-aware.ts index 531fbc76d..3891f585e 100644 --- a/packages/langium/src/parser/indentation-aware.ts +++ b/packages/langium/src/parser/indentation-aware.ts @@ -7,11 +7,11 @@ import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition, TokenVocabulary } from 'chevrotain'; import type { Grammar, TerminalRule } from '../languages/generated/ast.js'; import type { LexingReport, TokenBuilderOptions } from './token-builder.js'; -import type { LexerResult } from './lexer.js'; +import type { LexerResult, TokenizeOptions } from './lexer.js'; import type { LangiumCoreServices } from '../services.js'; import { createToken, createTokenInstance, Lexer } from 'chevrotain'; import { DefaultTokenBuilder } from './token-builder.js'; -import { DefaultLexer, isTokenTypeArray } from './lexer.js'; +import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js'; type IndentationAwareDelimiter = [begin: TokenName, end: TokenName]; @@ -179,11 +179,11 @@ export class IndentationAwareTokenBuilder): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + protected matchWhitespace(text: string, offset: number, tokens: IToken[], groups: Record): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } { this.whitespaceRegExp.lastIndex = offset; const match = this.whitespaceRegExp.exec(text); return { @@ -251,12 +254,10 @@ export class IndentationAwareTokenBuilder): ReturnType { - const { indentTokenName } = this.options; - if (!this.isStartOfLine(text, offset)) { return null; } @@ -274,7 +275,7 @@ export class IndentationAwareTokenBuilder): ReturnType { - const { dedentTokenName } = this.options; - if (!this.isStartOfLine(text, offset)) { return null; } @@ -316,7 +315,7 @@ export class IndentationAwareTokenBuilder 1) { remainingDedents.push( @@ -402,13 +401,15 @@ export class IndentationAwareLexer extends DefaultLexer { } } - override tokenize(text: string): LexerResult { + override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult { const result = super.tokenize(text); // consuming all remaining dedents and remove them as they might not be serializable const report = result.report as IndentationLexingReport; - const remainingDedents = report.remainingDedents; - result.tokens.push(...remainingDedents); + if (options?.mode === 'full') { + // auto-complete document with remaining dedents + result.tokens.push(...report.remainingDedents); + } report.remainingDedents = []; // remove any "indent-dedent" pair with an empty body as these are typically diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts index 44292780b..4d431e3aa 100644 --- a/packages/langium/src/parser/langium-parser.ts +++ b/packages/langium/src/parser/langium-parser.ts @@ -527,7 +527,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser { parse(input: string): CompletionParserResult { this.resetState(); - const tokens = this.lexer.tokenize(input); + const tokens = this.lexer.tokenize(input, { mode: 'partial' }); this.tokens = tokens.tokens; this.wrapper.input = [...this.tokens]; this.mainRule.call(this.wrapper, {}); diff --git a/packages/langium/src/parser/lexer.ts b/packages/langium/src/parser/lexer.ts index a45109e82..fedfad6fc 100644 --- a/packages/langium/src/parser/lexer.ts +++ b/packages/langium/src/parser/lexer.ts @@ -25,9 +25,17 @@ export interface LexerResult { report?: LexingReport; } +export type TokenizeMode = 'full' | 'partial'; + +export interface TokenizeOptions { + mode?: TokenizeMode; +} + +export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' }; + export interface Lexer { readonly definition: TokenTypeDictionary; - tokenize(text: string): LexerResult; + tokenize(text: string, options?: TokenizeOptions): LexerResult; } export class DefaultLexer implements Lexer { @@ -36,7 +44,7 @@ export class DefaultLexer implements Lexer { protected tokenBuilder: TokenBuilder; protected tokenTypes: TokenTypeDictionary; - constructor( services: LangiumCoreServices) { + constructor(services: LangiumCoreServices) { this.tokenBuilder = services.parser.TokenBuilder; const tokens = this.tokenBuilder.buildTokens(services.Grammar, { caseInsensitive: services.LanguageMetaData.caseInsensitive @@ -52,13 +60,13 @@ export class DefaultLexer implements Lexer { return this.tokenTypes; } - tokenize(text: string): LexerResult { + tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult { const chevrotainResult = this.chevrotainLexer.tokenize(text); return { tokens: chevrotainResult.tokens, errors: chevrotainResult.errors, hidden: chevrotainResult.groups.hidden ?? [], - report: this.tokenBuilder.popLexingReport?.(text) + report: this.tokenBuilder.flushLexingReport?.(text) }; } diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index 9407c9c71..a2d8c2952 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -25,7 +25,7 @@ export interface TokenBuilder { * * @param text The text that was tokenized. */ - popLexingReport?(text: string): LexingReport; + flushLexingReport?(text: string): LexingReport; } /** @@ -36,8 +36,10 @@ export interface LexingReport { diagnostics: LexingDiagnostic[]; } +export type LexingDiagnosticSeverity = 'error' | 'warning' | 'info' | 'hint'; + export interface LexingDiagnostic extends ILexingError { - severity?: 'error' | 'warning' | 'info' | 'hint'; + severity?: LexingDiagnosticSeverity; } export class DefaultTokenBuilder implements TokenBuilder { @@ -64,7 +66,8 @@ export class DefaultTokenBuilder implements TokenBuilder { return tokens; } - popLexingReport(_text: string): LexingReport { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + flushLexingReport(text: string): LexingReport { return { diagnostics: this.popDiagnostics() }; } diff --git a/packages/langium/src/validation/document-validator.ts b/packages/langium/src/validation/document-validator.ts index 8c4ae0850..804cb4765 100644 --- a/packages/langium/src/validation/document-validator.ts +++ b/packages/langium/src/validation/document-validator.ts @@ -11,14 +11,14 @@ import type { ParseResult } from '../parser/langium-parser.js'; import type { LangiumCoreServices } from '../services.js'; import type { AstNode, CstNode } from '../syntax-tree.js'; import type { LangiumDocument } from '../workspace/documents.js'; -import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry } from './validation-registry.js'; +import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry, ValidationSeverity } from './validation-registry.js'; import { CancellationToken } from '../utils/cancellation.js'; import { findNodeForKeyword, findNodeForProperty } from '../utils/grammar-utils.js'; import { streamAst } from '../utils/ast-utils.js'; import { tokenToRange } from '../utils/cst-utils.js'; import { interruptAndCheck, isOperationCancelled } from '../utils/promise-utils.js'; import { diagnosticData } from './validation-registry.js'; -import type { LexingDiagnostic } from '../parser/token-builder.js'; +import type { LexingDiagnostic, LexingDiagnosticSeverity } from '../parser/token-builder.js'; export interface ValidationOptions { /** @@ -100,7 +100,7 @@ export class DefaultDocumentValidator implements DocumentValidator { protected processLexingErrors(parseResult: ParseResult, diagnostics: Diagnostic[], _options: ValidationOptions): void { const lexerDiagnostics = [...parseResult.lexerErrors, ...parseResult.lexerReport?.diagnostics ?? []] as LexingDiagnostic[]; for (const lexerDiagnostic of lexerDiagnostics) { - const severity = lexerDiagnostic?.severity ?? 'error'; + const severity = lexerDiagnostic.severity ?? 'error'; const diagnostic: Diagnostic = { severity: toDiagnosticSeverity(severity), range: { @@ -180,7 +180,7 @@ export class DefaultDocumentValidator implements DocumentValidator { protected async validateAst(rootNode: AstNode, options: ValidationOptions, cancelToken = CancellationToken.None): Promise { const validationItems: Diagnostic[] = []; - const acceptor: ValidationAcceptor = (severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo) => { + const acceptor: ValidationAcceptor = (severity: ValidationSeverity, message: string, info: DiagnosticInfo) => { validationItems.push(this.toDiagnostic(severity, message, info)); }; @@ -194,7 +194,7 @@ export class DefaultDocumentValidator implements DocumentValidator { return validationItems; } - protected toDiagnostic(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo): Diagnostic { + protected toDiagnostic(severity: ValidationSeverity, message: string, info: DiagnosticInfo): Diagnostic { return { message, range: getDiagnosticRange(info), @@ -233,7 +233,7 @@ export function getDiagnosticRange(info: DiagnosticInfo(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo) => void +export type ValidationSeverity = 'error' | 'warning' | 'info' | 'hint'; + +export type ValidationAcceptor = (severity: ValidationSeverity, message: string, info: DiagnosticInfo) => void export type ValidationCheck = (node: T, accept: ValidationAcceptor, cancelToken: CancellationToken) => MaybePromise; diff --git a/packages/langium/test/parser/indentation-aware.test.ts b/packages/langium/test/parser/indentation-aware.test.ts index 1b1f51d7a..766e6194f 100644 --- a/packages/langium/test/parser/indentation-aware.test.ts +++ b/packages/langium/test/parser/indentation-aware.test.ts @@ -11,7 +11,7 @@ import { EmptyFileSystem, IndentationAwareLexer, IndentationAwareTokenBuilder } import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar'; import type { LangiumServices, PartialLangiumServices } from 'langium/lsp'; import { expandToString } from 'langium/generate'; -import { parseHelper } from 'langium/test'; +import { expectCompletion, parseHelper } from 'langium/test'; import type { IMultiModeLexerDefinition } from 'chevrotain'; const grammarServices = createLangiumGrammarServices(EmptyFileSystem).grammar; @@ -193,6 +193,18 @@ describe('IndentationAwareLexer', () => { expect(dedent.tokenType.name).toBe('DEDENT'); }); + test('should NOT add remaining dedents to the end if partial tokenizing', async () => { + const lexer = await getLexer(sampleGrammar); + const { tokens } = lexer.tokenize(expandToString` + // single-line comment + { + name`, { mode: 'partial' }); + expect(tokens).toHaveLength(3); + + const [/* L_BRAC */, indent, /* id */] = tokens; + expect(indent.tokenType.name).toBe('INDENT'); + }); + test('should not return any tokens for empty input', async () => { const lexer = await getLexer(sampleGrammar); const { tokens } = lexer.tokenize(''); @@ -389,6 +401,28 @@ describe('IndentationAware parsing', () => { expect(return2.value).toBe(true); }); + test.fails('should offer correct auto-completion parsing', async () => { + const text = expandToString` + <|>if true: + <|>return true + <|>else: + <|>if false: + <|>return true + <|>return false + <|>return true + `; + + const services = await createIndentationAwareServices(sampleGrammar); + const completion = expectCompletion(services); + await completion({ text, index: 0, expectedItems: ['if', 'return'] }); + // PR 1669: the lines below currently fail as the completion provider may wrongly assumes that all whitespace tokens are hidden + await completion({ text, index: 1, expectedItems: ['if', 'return'] }); + await completion({ text, index: 2, expectedItems: ['else'] }); + await completion({ text, index: 3, expectedItems: ['if', 'return'] }); + await completion({ text, index: 4, expectedItems: ['if', 'return'] }); + await completion({ text, index: 5, expectedItems: ['if', 'return'] }); + await completion({ text, index: 6, expectedItems: ['if', 'return'] }); + }); }); type Statement = If | Return;