From 464d594e385b121050092432529c8c07ccf584f7 Mon Sep 17 00:00:00 2001 From: Martin Fleck Date: Fri, 6 Sep 2024 11:25:02 +0200 Subject: [PATCH] Introduce tokenizing options for full and partial mode Add tokenizing mode to tokenizing method - Full: We get the full text to tokenize - Partial: We get only a portion of the text to tokenize In indentation lexing, we do not auto-complete dedents for partial mode --- packages/langium/src/parser/indentation-aware.ts | 12 +++++++----- packages/langium/src/parser/langium-parser.ts | 2 +- packages/langium/src/parser/lexer.ts | 10 ++++++++-- .../langium/test/parser/indentation-aware.test.ts | 12 ++++++++++++ 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/packages/langium/src/parser/indentation-aware.ts b/packages/langium/src/parser/indentation-aware.ts index 531fbc76d..d4c3abf5e 100644 --- a/packages/langium/src/parser/indentation-aware.ts +++ b/packages/langium/src/parser/indentation-aware.ts @@ -7,11 +7,11 @@ import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition, TokenVocabulary } from 'chevrotain'; import type { Grammar, TerminalRule } from '../languages/generated/ast.js'; import type { LexingReport, TokenBuilderOptions } from './token-builder.js'; -import type { LexerResult } from './lexer.js'; +import type { LexerResult, TokenizeOptions } from './lexer.js'; import type { LangiumCoreServices } from '../services.js'; import { createToken, createTokenInstance, Lexer } from 'chevrotain'; import { DefaultTokenBuilder } from './token-builder.js'; -import { DefaultLexer, isTokenTypeArray } from './lexer.js'; +import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js'; type IndentationAwareDelimiter = [begin: TokenName, end: TokenName]; @@ -402,13 +402,15 @@ export class IndentationAwareLexer extends DefaultLexer { } } - override tokenize(text: string): LexerResult { + override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult { const result = super.tokenize(text); // consuming all remaining dedents and remove them as they might not be serializable const report = result.report as IndentationLexingReport; - const remainingDedents = report.remainingDedents; - result.tokens.push(...remainingDedents); + if (options?.mode === 'full') { + // auto-complete document with remaining dedents + result.tokens.push(...report.remainingDedents); + } report.remainingDedents = []; // remove any "indent-dedent" pair with an empty body as these are typically diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts index 44292780b..4d431e3aa 100644 --- a/packages/langium/src/parser/langium-parser.ts +++ b/packages/langium/src/parser/langium-parser.ts @@ -527,7 +527,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser { parse(input: string): CompletionParserResult { this.resetState(); - const tokens = this.lexer.tokenize(input); + const tokens = this.lexer.tokenize(input, { mode: 'partial' }); this.tokens = tokens.tokens; this.wrapper.input = [...this.tokens]; this.mainRule.call(this.wrapper, {}); diff --git a/packages/langium/src/parser/lexer.ts b/packages/langium/src/parser/lexer.ts index a45109e82..bf6c0e299 100644 --- a/packages/langium/src/parser/lexer.ts +++ b/packages/langium/src/parser/lexer.ts @@ -25,9 +25,15 @@ export interface LexerResult { report?: LexingReport; } +export interface TokenizeOptions { + mode: 'full' | 'partial'; +} + +export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' }; + export interface Lexer { readonly definition: TokenTypeDictionary; - tokenize(text: string): LexerResult; + tokenize(text: string, options?: TokenizeOptions): LexerResult; } export class DefaultLexer implements Lexer { @@ -52,7 +58,7 @@ export class DefaultLexer implements Lexer { return this.tokenTypes; } - tokenize(text: string): LexerResult { + tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult { const chevrotainResult = this.chevrotainLexer.tokenize(text); return { tokens: chevrotainResult.tokens, diff --git a/packages/langium/test/parser/indentation-aware.test.ts b/packages/langium/test/parser/indentation-aware.test.ts index 1b1f51d7a..bf9683dee 100644 --- a/packages/langium/test/parser/indentation-aware.test.ts +++ b/packages/langium/test/parser/indentation-aware.test.ts @@ -193,6 +193,18 @@ describe('IndentationAwareLexer', () => { expect(dedent.tokenType.name).toBe('DEDENT'); }); + test('should NOT add remaining dedents to the end if partial tokenizing', async () => { + const lexer = await getLexer(sampleGrammar); + const { tokens } = lexer.tokenize(expandToString` + // single-line comment + { + name`, { mode: 'partial' }); + expect(tokens).toHaveLength(3); + + const [/* L_BRAC */, indent, /* id */] = tokens; + expect(indent.tokenType.name).toBe('INDENT'); + }); + test('should not return any tokens for empty input', async () => { const lexer = await getLexer(sampleGrammar); const { tokens } = lexer.tokenize('');