From 464d594e385b121050092432529c8c07ccf584f7 Mon Sep 17 00:00:00 2001
From: Martin Fleck <mfleck@eclipsesource.com>
Date: Fri, 6 Sep 2024 11:25:02 +0200
Subject: [PATCH] Introduce tokenizing options for full and partial mode

Add tokenizing mode to tokenizing method
- Full: We get the full text to tokenize
- Partial: We get only a portion of the text to tokenize

In indentation lexing, we do not auto-complete dedents for partial mode
---
 packages/langium/src/parser/indentation-aware.ts     | 12 +++++++-----
 packages/langium/src/parser/langium-parser.ts        |  2 +-
 packages/langium/src/parser/lexer.ts                 | 10 ++++++++--
 .../langium/test/parser/indentation-aware.test.ts    | 12 ++++++++++++
 4 files changed, 28 insertions(+), 8 deletions(-)
diff --git a/packages/langium/src/parser/indentation-aware.ts b/packages/langium/src/parser/indentation-aware.ts
index 531fbc76d..d4c3abf5e 100644
--- a/packages/langium/src/parser/indentation-aware.ts
+++ b/packages/langium/src/parser/indentation-aware.ts
@@ -7,11 +7,11 @@
 import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition, TokenVocabulary } from 'chevrotain';
 import type { Grammar, TerminalRule } from '../languages/generated/ast.js';
 import type { LexingReport, TokenBuilderOptions } from './token-builder.js';
-import type { LexerResult } from './lexer.js';
+import type { LexerResult, TokenizeOptions } from './lexer.js';
 import type { LangiumCoreServices } from '../services.js';
 import { createToken, createTokenInstance, Lexer } from 'chevrotain';
 import { DefaultTokenBuilder } from './token-builder.js';
-import { DefaultLexer, isTokenTypeArray } from './lexer.js';
+import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js';
 
 type IndentationAwareDelimiter<TokenName extends string> = [begin: TokenName, end: TokenName];
 
@@ -402,13 +402,15 @@ export class IndentationAwareLexer extends DefaultLexer {
         }
     }
 
-    override tokenize(text: string): LexerResult {
+    override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
         const result = super.tokenize(text);
 
         // consuming all remaining dedents and remove them as they might not be serializable
         const report = result.report as IndentationLexingReport;
-        const remainingDedents = report.remainingDedents;
-        result.tokens.push(...remainingDedents);
+        if (options?.mode === 'full') {
+            // auto-complete document with remaining dedents
+            result.tokens.push(...report.remainingDedents);
+        }
         report.remainingDedents = [];
 
         // remove any "indent-dedent" pair with an empty body as these are typically
diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts
index 44292780b..4d431e3aa 100644
--- a/packages/langium/src/parser/langium-parser.ts
+++ b/packages/langium/src/parser/langium-parser.ts
@@ -527,7 +527,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser {
 
     parse(input: string): CompletionParserResult {
         this.resetState();
-        const tokens = this.lexer.tokenize(input);
+        const tokens = this.lexer.tokenize(input, { mode: 'partial' });
         this.tokens = tokens.tokens;
         this.wrapper.input = [...this.tokens];
         this.mainRule.call(this.wrapper, {});
diff --git a/packages/langium/src/parser/lexer.ts b/packages/langium/src/parser/lexer.ts
index a45109e82..bf6c0e299 100644
--- a/packages/langium/src/parser/lexer.ts
+++ b/packages/langium/src/parser/lexer.ts
@@ -25,9 +25,15 @@ export interface LexerResult {
     report?: LexingReport;
 }
 
+export interface TokenizeOptions {
+    mode: 'full' | 'partial';
+}
+
+export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' };
+
 export interface Lexer {
     readonly definition: TokenTypeDictionary;
-    tokenize(text: string): LexerResult;
+    tokenize(text: string, options?: TokenizeOptions): LexerResult;
 }
 
 export class DefaultLexer implements Lexer {
@@ -52,7 +58,7 @@ export class DefaultLexer implements Lexer {
         return this.tokenTypes;
     }
 
-    tokenize(text: string): LexerResult {
+    tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
         const chevrotainResult = this.chevrotainLexer.tokenize(text);
         return {
             tokens: chevrotainResult.tokens,
diff --git a/packages/langium/test/parser/indentation-aware.test.ts b/packages/langium/test/parser/indentation-aware.test.ts
index 1b1f51d7a..bf9683dee 100644
--- a/packages/langium/test/parser/indentation-aware.test.ts
+++ b/packages/langium/test/parser/indentation-aware.test.ts
@@ -193,6 +193,18 @@ describe('IndentationAwareLexer', () => {
         expect(dedent.tokenType.name).toBe('DEDENT');
     });
 
+    test('should NOT add remaining dedents to the end if partial tokenizing', async () => {
+        const lexer = await getLexer(sampleGrammar);
+        const { tokens } = lexer.tokenize(expandToString`
+        // single-line comment
+        {
+            name`, { mode: 'partial' });
+        expect(tokens).toHaveLength(3);
+
+        const [/* L_BRAC */, indent, /* id */] = tokens;
+        expect(indent.tokenType.name).toBe('INDENT');
+    });
+
     test('should not return any tokens for empty input', async () => {
         const lexer = await getLexer(sampleGrammar);
         const { tokens } = lexer.tokenize('');