From 464d594e385b121050092432529c8c07ccf584f7 Mon Sep 17 00:00:00 2001 From: Martin Fleck Date: Fri, 6 Sep 2024 11:25:02 +0200 Subject: [PATCH 1/3] Introduce tokenizing options for full and partial mode Add tokenizing mode to tokenizing method - Full: We get the full text to tokenize - Partial: We get only a portion of the text to tokenize In indentation lexing, we do not auto-complete dedents for partial mode --- packages/langium/src/parser/indentation-aware.ts | 12 +++++++----- packages/langium/src/parser/langium-parser.ts | 2 +- packages/langium/src/parser/lexer.ts | 10 ++++++++-- .../langium/test/parser/indentation-aware.test.ts | 12 ++++++++++++ 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/packages/langium/src/parser/indentation-aware.ts b/packages/langium/src/parser/indentation-aware.ts index 531fbc76d..d4c3abf5e 100644 --- a/packages/langium/src/parser/indentation-aware.ts +++ b/packages/langium/src/parser/indentation-aware.ts @@ -7,11 +7,11 @@ import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition, TokenVocabulary } from 'chevrotain'; import type { Grammar, TerminalRule } from '../languages/generated/ast.js'; import type { LexingReport, TokenBuilderOptions } from './token-builder.js'; -import type { LexerResult } from './lexer.js'; +import type { LexerResult, TokenizeOptions } from './lexer.js'; import type { LangiumCoreServices } from '../services.js'; import { createToken, createTokenInstance, Lexer } from 'chevrotain'; import { DefaultTokenBuilder } from './token-builder.js'; -import { DefaultLexer, isTokenTypeArray } from './lexer.js'; +import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js'; type IndentationAwareDelimiter = [begin: TokenName, end: TokenName]; @@ -402,13 +402,15 @@ export class IndentationAwareLexer extends DefaultLexer { } } - override tokenize(text: string): LexerResult { + override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult { const result = super.tokenize(text); // consuming all remaining dedents and remove them as they might not be serializable const report = result.report as IndentationLexingReport; - const remainingDedents = report.remainingDedents; - result.tokens.push(...remainingDedents); + if (options?.mode === 'full') { + // auto-complete document with remaining dedents + result.tokens.push(...report.remainingDedents); + } report.remainingDedents = []; // remove any "indent-dedent" pair with an empty body as these are typically diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts index 44292780b..4d431e3aa 100644 --- a/packages/langium/src/parser/langium-parser.ts +++ b/packages/langium/src/parser/langium-parser.ts @@ -527,7 +527,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser { parse(input: string): CompletionParserResult { this.resetState(); - const tokens = this.lexer.tokenize(input); + const tokens = this.lexer.tokenize(input, { mode: 'partial' }); this.tokens = tokens.tokens; this.wrapper.input = [...this.tokens]; this.mainRule.call(this.wrapper, {}); diff --git a/packages/langium/src/parser/lexer.ts b/packages/langium/src/parser/lexer.ts index a45109e82..bf6c0e299 100644 --- a/packages/langium/src/parser/lexer.ts +++ b/packages/langium/src/parser/lexer.ts @@ -25,9 +25,15 @@ export interface LexerResult { report?: LexingReport; } +export interface TokenizeOptions { + mode: 'full' | 'partial'; +} + +export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' }; + export interface Lexer { readonly definition: TokenTypeDictionary; - tokenize(text: string): LexerResult; + tokenize(text: string, options?: TokenizeOptions): LexerResult; } export class DefaultLexer implements Lexer { @@ -52,7 +58,7 @@ export class DefaultLexer implements Lexer { return this.tokenTypes; } - tokenize(text: string): LexerResult { + tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult { const chevrotainResult = this.chevrotainLexer.tokenize(text); return { tokens: chevrotainResult.tokens, diff --git a/packages/langium/test/parser/indentation-aware.test.ts b/packages/langium/test/parser/indentation-aware.test.ts index 1b1f51d7a..bf9683dee 100644 --- a/packages/langium/test/parser/indentation-aware.test.ts +++ b/packages/langium/test/parser/indentation-aware.test.ts @@ -193,6 +193,18 @@ describe('IndentationAwareLexer', () => { expect(dedent.tokenType.name).toBe('DEDENT'); }); + test('should NOT add remaining dedents to the end if partial tokenizing', async () => { + const lexer = await getLexer(sampleGrammar); + const { tokens } = lexer.tokenize(expandToString` + // single-line comment + { + name`, { mode: 'partial' }); + expect(tokens).toHaveLength(3); + + const [/* L_BRAC */, indent, /* id */] = tokens; + expect(indent.tokenType.name).toBe('INDENT'); + }); + test('should not return any tokens for empty input', async () => { const lexer = await getLexer(sampleGrammar); const { tokens } = lexer.tokenize(''); From 14abdc4cf1c06c8a20b87dd3fd38ed742eb8514d Mon Sep 17 00:00:00 2001 From: Martin Fleck Date: Fri, 6 Sep 2024 13:30:28 +0200 Subject: [PATCH 2/3] Consider PR feedback - Adapt a few method names - Add missing parameter JSDoc - Fix wrong whitespace in Lexer constructor - Introduce specific types for unions - Add completion test - Do not use token names if there is no match as the text length is important for offset calculation --- .../langium/src/parser/indentation-aware.ts | 25 +++++++++---------- packages/langium/src/parser/lexer.ts | 8 +++--- packages/langium/src/parser/token-builder.ts | 9 ++++--- .../src/validation/document-validator.ts | 14 +++++------ .../src/validation/validation-registry.ts | 4 ++- .../test/parser/indentation-aware.test.ts | 23 ++++++++++++++++- 6 files changed, 55 insertions(+), 28 deletions(-) diff --git a/packages/langium/src/parser/indentation-aware.ts b/packages/langium/src/parser/indentation-aware.ts index d4c3abf5e..2ff40dec6 100644 --- a/packages/langium/src/parser/indentation-aware.ts +++ b/packages/langium/src/parser/indentation-aware.ts @@ -179,11 +179,11 @@ export class IndentationAwareTokenBuilder): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + protected matchWhitespace(text: string, offset: number, tokens: IToken[], groups: Record): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } { this.whitespaceRegExp.lastIndex = offset; const match = this.whitespaceRegExp.exec(text); return { @@ -251,12 +254,10 @@ export class IndentationAwareTokenBuilder): ReturnType { - const { indentTokenName } = this.options; - if (!this.isStartOfLine(text, offset)) { return null; } @@ -274,7 +275,7 @@ export class IndentationAwareTokenBuilder): ReturnType { - const { dedentTokenName } = this.options; - if (!this.isStartOfLine(text, offset)) { return null; } @@ -327,7 +326,7 @@ export class IndentationAwareTokenBuilder 1) { remainingDedents.push( diff --git a/packages/langium/src/parser/lexer.ts b/packages/langium/src/parser/lexer.ts index bf6c0e299..28fd8c87a 100644 --- a/packages/langium/src/parser/lexer.ts +++ b/packages/langium/src/parser/lexer.ts @@ -25,8 +25,10 @@ export interface LexerResult { report?: LexingReport; } +export type TokenizeMode = 'full' | 'partial'; + export interface TokenizeOptions { - mode: 'full' | 'partial'; + mode: TokenizeMode; } export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' }; @@ -42,7 +44,7 @@ export class DefaultLexer implements Lexer { protected tokenBuilder: TokenBuilder; protected tokenTypes: TokenTypeDictionary; - constructor( services: LangiumCoreServices) { + constructor(services: LangiumCoreServices) { this.tokenBuilder = services.parser.TokenBuilder; const tokens = this.tokenBuilder.buildTokens(services.Grammar, { caseInsensitive: services.LanguageMetaData.caseInsensitive @@ -64,7 +66,7 @@ export class DefaultLexer implements Lexer { tokens: chevrotainResult.tokens, errors: chevrotainResult.errors, hidden: chevrotainResult.groups.hidden ?? [], - report: this.tokenBuilder.popLexingReport?.(text) + report: this.tokenBuilder.flushLexingReport?.(text) }; } diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index 9407c9c71..a2d8c2952 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -25,7 +25,7 @@ export interface TokenBuilder { * * @param text The text that was tokenized. */ - popLexingReport?(text: string): LexingReport; + flushLexingReport?(text: string): LexingReport; } /** @@ -36,8 +36,10 @@ export interface LexingReport { diagnostics: LexingDiagnostic[]; } +export type LexingDiagnosticSeverity = 'error' | 'warning' | 'info' | 'hint'; + export interface LexingDiagnostic extends ILexingError { - severity?: 'error' | 'warning' | 'info' | 'hint'; + severity?: LexingDiagnosticSeverity; } export class DefaultTokenBuilder implements TokenBuilder { @@ -64,7 +66,8 @@ export class DefaultTokenBuilder implements TokenBuilder { return tokens; } - popLexingReport(_text: string): LexingReport { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + flushLexingReport(text: string): LexingReport { return { diagnostics: this.popDiagnostics() }; } diff --git a/packages/langium/src/validation/document-validator.ts b/packages/langium/src/validation/document-validator.ts index 8c4ae0850..804cb4765 100644 --- a/packages/langium/src/validation/document-validator.ts +++ b/packages/langium/src/validation/document-validator.ts @@ -11,14 +11,14 @@ import type { ParseResult } from '../parser/langium-parser.js'; import type { LangiumCoreServices } from '../services.js'; import type { AstNode, CstNode } from '../syntax-tree.js'; import type { LangiumDocument } from '../workspace/documents.js'; -import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry } from './validation-registry.js'; +import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry, ValidationSeverity } from './validation-registry.js'; import { CancellationToken } from '../utils/cancellation.js'; import { findNodeForKeyword, findNodeForProperty } from '../utils/grammar-utils.js'; import { streamAst } from '../utils/ast-utils.js'; import { tokenToRange } from '../utils/cst-utils.js'; import { interruptAndCheck, isOperationCancelled } from '../utils/promise-utils.js'; import { diagnosticData } from './validation-registry.js'; -import type { LexingDiagnostic } from '../parser/token-builder.js'; +import type { LexingDiagnostic, LexingDiagnosticSeverity } from '../parser/token-builder.js'; export interface ValidationOptions { /** @@ -100,7 +100,7 @@ export class DefaultDocumentValidator implements DocumentValidator { protected processLexingErrors(parseResult: ParseResult, diagnostics: Diagnostic[], _options: ValidationOptions): void { const lexerDiagnostics = [...parseResult.lexerErrors, ...parseResult.lexerReport?.diagnostics ?? []] as LexingDiagnostic[]; for (const lexerDiagnostic of lexerDiagnostics) { - const severity = lexerDiagnostic?.severity ?? 'error'; + const severity = lexerDiagnostic.severity ?? 'error'; const diagnostic: Diagnostic = { severity: toDiagnosticSeverity(severity), range: { @@ -180,7 +180,7 @@ export class DefaultDocumentValidator implements DocumentValidator { protected async validateAst(rootNode: AstNode, options: ValidationOptions, cancelToken = CancellationToken.None): Promise { const validationItems: Diagnostic[] = []; - const acceptor: ValidationAcceptor = (severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo) => { + const acceptor: ValidationAcceptor = (severity: ValidationSeverity, message: string, info: DiagnosticInfo) => { validationItems.push(this.toDiagnostic(severity, message, info)); }; @@ -194,7 +194,7 @@ export class DefaultDocumentValidator implements DocumentValidator { return validationItems; } - protected toDiagnostic(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo): Diagnostic { + protected toDiagnostic(severity: ValidationSeverity, message: string, info: DiagnosticInfo): Diagnostic { return { message, range: getDiagnosticRange(info), @@ -233,7 +233,7 @@ export function getDiagnosticRange(info: DiagnosticInfo(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo) => void +export type ValidationSeverity = 'error' | 'warning' | 'info' | 'hint'; + +export type ValidationAcceptor = (severity: ValidationSeverity, message: string, info: DiagnosticInfo) => void export type ValidationCheck = (node: T, accept: ValidationAcceptor, cancelToken: CancellationToken) => MaybePromise; diff --git a/packages/langium/test/parser/indentation-aware.test.ts b/packages/langium/test/parser/indentation-aware.test.ts index bf9683dee..558235570 100644 --- a/packages/langium/test/parser/indentation-aware.test.ts +++ b/packages/langium/test/parser/indentation-aware.test.ts @@ -11,7 +11,7 @@ import { EmptyFileSystem, IndentationAwareLexer, IndentationAwareTokenBuilder } import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar'; import type { LangiumServices, PartialLangiumServices } from 'langium/lsp'; import { expandToString } from 'langium/generate'; -import { parseHelper } from 'langium/test'; +import { expectCompletion, parseHelper } from 'langium/test'; import type { IMultiModeLexerDefinition } from 'chevrotain'; const grammarServices = createLangiumGrammarServices(EmptyFileSystem).grammar; @@ -401,6 +401,27 @@ describe('IndentationAware parsing', () => { expect(return2.value).toBe(true); }); + test('should offer correct auto-completion parsing', async () => { + const text = expandToString` + <|>if true: + <|>return true + <|>else: + <|>if false: + <|>return true + <|>return false + <|>return true + `; + + const services = await createIndentationAwareServices(sampleGrammar); + const completion = expectCompletion(services); + await completion({ text, index: 0, expectedItems: ['if', 'return'] }); + await completion({ text, index: 1, expectedItems: ['if', 'return'] }); + await completion({ text, index: 2, expectedItems: ['else'] }); + await completion({ text, index: 3, expectedItems: ['if', 'return'] }); + await completion({ text, index: 4, expectedItems: ['if', 'return'] }); + await completion({ text, index: 5, expectedItems: ['if', 'return'] }); + await completion({ text, index: 6, expectedItems: ['if', 'return'] }); + }); }); type Statement = If | Return; From a4d2cb3fb30ebd3b3cd4f5b2f41f0a1a97594632 Mon Sep 17 00:00:00 2001 From: Martin Fleck Date: Fri, 6 Sep 2024 16:52:25 +0200 Subject: [PATCH 3/3] PR feedback - Ensure column-index is 1-based to avoid error - Make properties of optional options also optional - Mark test case as failing --- packages/langium/src/parser/indentation-aware.ts | 2 +- packages/langium/src/parser/lexer.ts | 2 +- packages/langium/test/parser/indentation-aware.test.ts | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/langium/src/parser/indentation-aware.ts b/packages/langium/src/parser/indentation-aware.ts index 2ff40dec6..3891f585e 100644 --- a/packages/langium/src/parser/indentation-aware.ts +++ b/packages/langium/src/parser/indentation-aware.ts @@ -315,7 +315,7 @@ export class IndentationAwareTokenBuilder { expect(return2.value).toBe(true); }); - test('should offer correct auto-completion parsing', async () => { + test.fails('should offer correct auto-completion parsing', async () => { const text = expandToString` <|>if true: <|>return true @@ -415,6 +415,7 @@ describe('IndentationAware parsing', () => { const services = await createIndentationAwareServices(sampleGrammar); const completion = expectCompletion(services); await completion({ text, index: 0, expectedItems: ['if', 'return'] }); + // PR 1669: the lines below currently fail as the completion provider may wrongly assumes that all whitespace tokens are hidden await completion({ text, index: 1, expectedItems: ['if', 'return'] }); await completion({ text, index: 2, expectedItems: ['else'] }); await completion({ text, index: 3, expectedItems: ['if', 'return'] });