From 940fa56dce1758f02c3423c642f5893de997668a Mon Sep 17 00:00:00 2001 From: Mark Sujew Date: Tue, 14 Jan 2025 19:31:15 +0100 Subject: [PATCH 1/3] Improve performance of hidden node parsing --- .../langium/src/parser/cst-node-builder.ts | 66 +++++++++---------- packages/langium/src/parser/langium-parser.ts | 31 ++++++++- packages/langium/src/serializer/hydrator.ts | 13 ++-- packages/langium/src/syntax-tree.ts | 4 +- .../test/parser/langium-parser.test.ts | 27 +++++++- packages/langium/test/utils/cst-utils.test.ts | 31 +++++++++ 6 files changed, 124 insertions(+), 48 deletions(-) diff --git a/packages/langium/src/parser/cst-node-builder.ts b/packages/langium/src/parser/cst-node-builder.ts index 95a744ab6..0fd813d72 100644 --- a/packages/langium/src/parser/cst-node-builder.ts +++ b/packages/langium/src/parser/cst-node-builder.ts @@ -9,7 +9,6 @@ import type { Range } from 'vscode-languageserver-types'; import type { AbstractElement } from '../languages/generated/ast.js'; import type { AstNode, CompositeCstNode, CstNode, LeafCstNode, RootCstNode } from '../syntax-tree.js'; import { Position } from 'vscode-languageserver-types'; -import { isCompositeCstNode } from '../syntax-tree.js'; import { tokenToRange } from '../utils/cst-utils.js'; export class CstNodeBuilder { @@ -17,7 +16,7 @@ export class CstNodeBuilder { private rootNode!: RootCstNodeImpl; private nodeStack: CompositeCstNodeImpl[] = []; - private get current(): CompositeCstNodeImpl { + get current(): CompositeCstNodeImpl { return this.nodeStack[this.nodeStack.length - 1] ?? this.rootNode; } @@ -37,8 +36,8 @@ export class CstNodeBuilder { return compositeNode; } - buildLeafNode(token: IToken, feature: AbstractElement): LeafCstNode { - const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, false); + buildLeafNode(token: IToken, feature?: AbstractElement): LeafCstNode { + const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, !feature); leafNode.grammarSource = feature; leafNode.root = this.rootNode; this.current.content.push(leafNode); @@ -55,6 +54,33 @@ export class CstNodeBuilder { } } + addHiddenNode(token: IToken): void { + const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true); + leafNode.root = this.rootNode; + let current: CompositeCstNode = this.current; + let added = false; + // If we are within a composite node, we add the hidden node to the content + if (current.content.length > 0) { + current.content.push(leafNode); + return; + } + while (current.container) { + const index = current.container.content.indexOf(current); + if (index > 0) { + // Add the hidden node before the current node + current.container.content.splice(index, 0, leafNode); + added = true; + break; + } + current = current.container; + } + // If we arrive at the root node, we add the hidden node at the beginning + // This is the case if the hidden node is the first node in the tree + if (!added) { + this.rootNode.content.unshift(leafNode); + } + } + construct(item: { $type: string | symbol | undefined, $cstNode: CstNode }): void { const current: CstNode = this.current; // The specified item could be a datatype ($type is symbol) or a fragment ($type is undefined) @@ -70,34 +96,6 @@ export class CstNodeBuilder { this.removeNode(node); } } - - addHiddenTokens(hiddenTokens: IToken[]): void { - for (const token of hiddenTokens) { - const hiddenNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true); - hiddenNode.root = this.rootNode; - this.addHiddenToken(this.rootNode, hiddenNode); - } - } - - private addHiddenToken(node: CompositeCstNode, token: LeafCstNode): void { - const { offset: tokenStart, end: tokenEnd } = token; - - for (let i = 0; i < node.content.length; i++) { - const child = node.content[i]; - const { offset: childStart, end: childEnd } = child; - if (isCompositeCstNode(child) && tokenStart > childStart && tokenEnd < childEnd) { - this.addHiddenToken(child, token); - return; - } else if (tokenEnd <= childStart) { - node.content.splice(i, 0, token); - return; - } - } - - // We know that we haven't found a suited position for the token - // So we simply add it to the end of the current node - node.content.push(token); - } } export abstract class AbstractCstNode implements CstNode { @@ -107,7 +105,7 @@ export abstract class AbstractCstNode implements CstNode { abstract get range(): Range; container?: CompositeCstNode; - grammarSource: AbstractElement; + grammarSource?: AbstractElement; root: RootCstNode; private _astNode?: AstNode; @@ -117,7 +115,7 @@ export abstract class AbstractCstNode implements CstNode { } /** @deprecated use `grammarSource` instead. */ - get feature(): AbstractElement { + get feature(): AbstractElement | undefined { return this.grammarSource; } diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts index be07feaf2..c68f9ebaf 100644 --- a/packages/langium/src/parser/langium-parser.ts +++ b/packages/langium/src/parser/langium-parser.ts @@ -10,7 +10,7 @@ import type { AbstractElement, Action, Assignment, ParserRule } from '../languag import type { Linker } from '../references/linker.js'; import type { LangiumCoreServices } from '../services.js'; import type { AstNode, AstReflection, CompositeCstNode, CstNode } from '../syntax-tree.js'; -import type { Lexer } from './lexer.js'; +import type { Lexer, LexerResult } from './lexer.js'; import type { IParserConfig } from './parser-config.js'; import type { ValueConverter } from './value-converter.js'; import { defaultParserErrorProvider, EmbeddedActionsParser, LLkLookaheadStrategy } from 'chevrotain'; @@ -195,6 +195,7 @@ export class LangiumParser extends AbstractLangiumParser { private readonly converter: ValueConverter; private readonly astReflection: AstReflection; private readonly nodeBuilder = new CstNodeBuilder(); + private lexerResult?: LexerResult; private stack: any[] = []; private assignmentMap = new Map(); @@ -232,15 +233,16 @@ export class LangiumParser extends AbstractLangiumParser { parse(input: string, options: ParserOptions = {}): ParseResult { this.nodeBuilder.buildRootNode(input); - const lexerResult = this.lexer.tokenize(input); + const lexerResult = this.lexerResult = this.lexer.tokenize(input); this.wrapper.input = lexerResult.tokens; const ruleMethod = options.rule ? this.allRules.get(options.rule) : this.mainRule; if (!ruleMethod) { throw new Error(options.rule ? `No rule found with name '${options.rule}'` : 'No main rule available.'); } const result = ruleMethod.call(this.wrapper, {}); - this.nodeBuilder.addHiddenTokens(lexerResult.hidden); + this.appendHiddenTokens(lexerResult.hidden); this.unorderedGroups.clear(); + this.lexerResult = undefined; return { value: result, lexerErrors: lexerResult.errors, @@ -273,9 +275,32 @@ export class LangiumParser extends AbstractLangiumParser { }; } + private appendHiddenTokens(tokens: IToken[]): void { + for (const token of tokens) { + this.nodeBuilder.addHiddenNode(token); + } + } + + private getHiddenTokens(token: IToken): IToken[] { + const hiddenTokens = this.lexerResult!.hidden; + if (!hiddenTokens.length) { + return []; + } + const offset = token.startOffset; + for (let i = 0; i < hiddenTokens.length; i++) { + const token = hiddenTokens[i]; + if (token.startOffset > offset) { + return hiddenTokens.splice(0, i); + } + } + return hiddenTokens.splice(0, hiddenTokens.length); + } + consume(idx: number, tokenType: TokenType, feature: AbstractElement): void { const token = this.wrapper.wrapConsume(idx, tokenType); if (!this.isRecording() && this.isValidToken(token)) { + const hiddenTokens = this.getHiddenTokens(token); + this.appendHiddenTokens(hiddenTokens); const leafNode = this.nodeBuilder.buildLeafNode(token, feature); const { assignment, isCrossRef } = this.getAssignment(feature); const current = this.current; diff --git a/packages/langium/src/serializer/hydrator.ts b/packages/langium/src/serializer/hydrator.ts index c18592f36..0f9477c70 100644 --- a/packages/langium/src/serializer/hydrator.ts +++ b/packages/langium/src/serializer/hydrator.ts @@ -296,23 +296,22 @@ export class DefaultHydrator implements Hydrator { return this.lexer.definition[name]; } - protected getGrammarElementId(node: AbstractElement): number | undefined { + protected getGrammarElementId(node: AbstractElement | undefined): number | undefined { + if (!node) { + return undefined; + } if (this.grammarElementIdMap.size === 0) { this.createGrammarElementIdMap(); } return this.grammarElementIdMap.get(node); } - protected getGrammarElement(id: number): AbstractElement { + protected getGrammarElement(id: number): AbstractElement | undefined { if (this.grammarElementIdMap.size === 0) { this.createGrammarElementIdMap(); } const element = this.grammarElementIdMap.getKey(id); - if (element) { - return element; - } else { - throw new Error('Invalid grammar element id: ' + id); - } + return element; } protected createGrammarElementIdMap(): void { diff --git a/packages/langium/src/syntax-tree.ts b/packages/langium/src/syntax-tree.ts index 47a548715..cbdc8bbbd 100644 --- a/packages/langium/src/syntax-tree.ts +++ b/packages/langium/src/syntax-tree.ts @@ -234,9 +234,9 @@ export interface CstNode extends DocumentSegment { /** The root CST node */ readonly root: RootCstNode; /** The grammar element from which this node was parsed */ - readonly grammarSource: AbstractElement; + readonly grammarSource?: AbstractElement; /** @deprecated use `grammarSource` instead. */ - readonly feature: AbstractElement; + readonly feature?: AbstractElement; /** The AST node created from this CST node */ readonly astNode: AstNode; /** @deprecated use `astNode` instead. */ diff --git a/packages/langium/test/parser/langium-parser.test.ts b/packages/langium/test/parser/langium-parser.test.ts index 7da85a974..efb6491c7 100644 --- a/packages/langium/test/parser/langium-parser.test.ts +++ b/packages/langium/test/parser/langium-parser.test.ts @@ -4,9 +4,9 @@ * terms of the MIT License, which is available in the project root. ******************************************************************************/ -import type { AstNode, LangiumCoreServices } from 'langium'; +import { EmptyFileSystem, type AstNode, type LangiumCoreServices } from 'langium'; import { describe, expect, test, beforeEach } from 'vitest'; -import { createServicesForGrammar } from 'langium/grammar'; +import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar'; import { parseHelper } from 'langium/test'; describe('Partial parsing', () => { @@ -66,6 +66,29 @@ describe('Partial parsing', () => { }); +describe('hidden node parsing', () => { + + test('finishes in expected time', async () => { + const parser = createLangiumGrammarServices(EmptyFileSystem).grammar.parser.LangiumParser; + let content = 'Rule:'; + // Adding hidden nodes used to cause exponential parsing time behavior + for (let i = 0; i < 2500; i++) { + content += "'a' /* A */ /* B */ /* C */\n"; + } + content += ';'; + const start = Date.now(); + // This roughly takes 100-300 ms on a modern machine + // If it takes longer, the hidden node parsing is likely to be exponential + // On an older version of the parser, this took ~5 seconds + const result = parser.parse(content); + expect(result.lexerErrors).toHaveLength(0); + expect(result.parserErrors).toHaveLength(0); + const end = Date.now(); + expect(end - start).toBeLessThan(1000); + }); + +}); + interface A extends AstNode { name: string } diff --git a/packages/langium/test/utils/cst-utils.test.ts b/packages/langium/test/utils/cst-utils.test.ts index 89b506140..30b3bdf9a 100644 --- a/packages/langium/test/utils/cst-utils.test.ts +++ b/packages/langium/test/utils/cst-utils.test.ts @@ -62,6 +62,37 @@ describe('findLeafNode', () => { } }); +describe('findCommentNode', () => { + test('Finds correct comment with multiple comments before and after', async () => { + const text = expandToString` + Main: value=AB; + terminal AB: + /** A */ + /** B */ + 'A' + /** C */ + /** D */; + `; + const grammar = await parser(text); + const offset = text.indexOf("'A'") + 1; + const leafNode = findLeafNodeAtOffset(grammar.parseResult.value.$cstNode!, offset); + const keyword = leafNode?.astNode; + expect(keyword).toBeDefined(); + const comment = CstUtils.findCommentNode(keyword?.$cstNode, ['ML_COMMENT']); + expect(comment?.text).toBe('/** B */'); + }); + + test('Finds correct comment at the start of the file', async () => { + const text = expandToString` + /** A */ + grammar test + `; + const grammar = await parser(text); + const comment = CstUtils.findCommentNode(grammar.parseResult.value.$cstNode, ['ML_COMMENT']); + expect(comment?.text).toBe('/** A */'); + }); +}); + describe('compareRange', () => { test.each([ // Different lines From faa656e954b284d3db124f6d0211e7d8a48cd683 Mon Sep 17 00:00:00 2001 From: Mark Sujew Date: Wed, 15 Jan 2025 12:16:01 +0100 Subject: [PATCH 2/3] Improve performance even further --- .../langium/src/parser/cst-node-builder.ts | 24 +++++++++++-------- packages/langium/src/parser/langium-parser.ts | 10 ++------ packages/langium/test/utils/cst-utils.test.ts | 4 +++- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/langium/src/parser/cst-node-builder.ts b/packages/langium/src/parser/cst-node-builder.ts index 0fd813d72..1d6902502 100644 --- a/packages/langium/src/parser/cst-node-builder.ts +++ b/packages/langium/src/parser/cst-node-builder.ts @@ -54,30 +54,34 @@ export class CstNodeBuilder { } } - addHiddenNode(token: IToken): void { - const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true); - leafNode.root = this.rootNode; + addHiddenNodes(tokens: IToken[]): void { + const nodes: LeafCstNode[] = []; + for (const token of tokens) { + const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true); + leafNode.root = this.rootNode; + nodes.push(leafNode); + } let current: CompositeCstNode = this.current; let added = false; - // If we are within a composite node, we add the hidden node to the content + // If we are within a composite node, we add the hidden nodes to the content if (current.content.length > 0) { - current.content.push(leafNode); + current.content.push(...nodes); return; } while (current.container) { const index = current.container.content.indexOf(current); if (index > 0) { - // Add the hidden node before the current node - current.container.content.splice(index, 0, leafNode); + // Add the hidden nodes before the current node + current.container.content.splice(index, 0, ...nodes); added = true; break; } current = current.container; } - // If we arrive at the root node, we add the hidden node at the beginning - // This is the case if the hidden node is the first node in the tree + // If we arrive at the root node, we add the hidden nodes at the beginning + // This is the case if the hidden nodes are the first nodes in the tree if (!added) { - this.rootNode.content.unshift(leafNode); + this.rootNode.content.unshift(...nodes); } } diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts index c68f9ebaf..d94e50b0e 100644 --- a/packages/langium/src/parser/langium-parser.ts +++ b/packages/langium/src/parser/langium-parser.ts @@ -240,7 +240,7 @@ export class LangiumParser extends AbstractLangiumParser { throw new Error(options.rule ? `No rule found with name '${options.rule}'` : 'No main rule available.'); } const result = ruleMethod.call(this.wrapper, {}); - this.appendHiddenTokens(lexerResult.hidden); + this.nodeBuilder.addHiddenNodes(lexerResult.hidden); this.unorderedGroups.clear(); this.lexerResult = undefined; return { @@ -275,12 +275,6 @@ export class LangiumParser extends AbstractLangiumParser { }; } - private appendHiddenTokens(tokens: IToken[]): void { - for (const token of tokens) { - this.nodeBuilder.addHiddenNode(token); - } - } - private getHiddenTokens(token: IToken): IToken[] { const hiddenTokens = this.lexerResult!.hidden; if (!hiddenTokens.length) { @@ -300,7 +294,7 @@ export class LangiumParser extends AbstractLangiumParser { const token = this.wrapper.wrapConsume(idx, tokenType); if (!this.isRecording() && this.isValidToken(token)) { const hiddenTokens = this.getHiddenTokens(token); - this.appendHiddenTokens(hiddenTokens); + this.nodeBuilder.addHiddenNodes(hiddenTokens); const leafNode = this.nodeBuilder.buildLeafNode(token, feature); const { assignment, isCrossRef } = this.getAssignment(feature); const current = this.current; diff --git a/packages/langium/test/utils/cst-utils.test.ts b/packages/langium/test/utils/cst-utils.test.ts index 30b3bdf9a..9c232e65d 100644 --- a/packages/langium/test/utils/cst-utils.test.ts +++ b/packages/langium/test/utils/cst-utils.test.ts @@ -85,11 +85,13 @@ describe('findCommentNode', () => { test('Finds correct comment at the start of the file', async () => { const text = expandToString` /** A */ + /** B */ + /** C */ grammar test `; const grammar = await parser(text); const comment = CstUtils.findCommentNode(grammar.parseResult.value.$cstNode, ['ML_COMMENT']); - expect(comment?.text).toBe('/** A */'); + expect(comment?.text).toBe('/** C */'); }); }); From 11d88a17814b25dc574b4b4bb8726dd45c0c0efd Mon Sep 17 00:00:00 2001 From: Mark Sujew Date: Wed, 15 Jan 2025 13:46:10 +0100 Subject: [PATCH 3/3] Rename api --- packages/langium/src/parser/cst-node-builder.ts | 2 ++ packages/langium/src/parser/langium-parser.ts | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/langium/src/parser/cst-node-builder.ts b/packages/langium/src/parser/cst-node-builder.ts index 1d6902502..72c89a160 100644 --- a/packages/langium/src/parser/cst-node-builder.ts +++ b/packages/langium/src/parser/cst-node-builder.ts @@ -68,6 +68,8 @@ export class CstNodeBuilder { current.content.push(...nodes); return; } + // Otherwise we are at a newly created node + // Instead of adding the hidden nodes here, we search for the first parent node with content while (current.container) { const index = current.container.content.indexOf(current); if (index > 0) { diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts index d94e50b0e..03a28b207 100644 --- a/packages/langium/src/parser/langium-parser.ts +++ b/packages/langium/src/parser/langium-parser.ts @@ -275,7 +275,7 @@ export class LangiumParser extends AbstractLangiumParser { }; } - private getHiddenTokens(token: IToken): IToken[] { + private extractHiddenTokens(token: IToken): IToken[] { const hiddenTokens = this.lexerResult!.hidden; if (!hiddenTokens.length) { return []; @@ -293,7 +293,7 @@ export class LangiumParser extends AbstractLangiumParser { consume(idx: number, tokenType: TokenType, feature: AbstractElement): void { const token = this.wrapper.wrapConsume(idx, tokenType); if (!this.isRecording() && this.isValidToken(token)) { - const hiddenTokens = this.getHiddenTokens(token); + const hiddenTokens = this.extractHiddenTokens(token); this.nodeBuilder.addHiddenNodes(hiddenTokens); const leafNode = this.nodeBuilder.buildLeafNode(token, feature); const { assignment, isCrossRef } = this.getAssignment(feature);