From b4cf27127f18660a208cbe907be4fa47cf4aa9ab Mon Sep 17 00:00:00 2001 From: Nikolay Kostyurin Date: Sun, 30 Jun 2019 11:15:10 +0200 Subject: [PATCH] fix(parser): infinity loop problem when escape `[\b]` (#31) With enableEscapeTags: true, when trying to write [b]test[\b] page is crashed. Fixes #23 --- packages/bbob-parser/src/lexer.js | 30 ++++++++++++++++--------- packages/bbob-parser/test/lexer.test.js | 22 +++++++++++++----- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/packages/bbob-parser/src/lexer.js b/packages/bbob-parser/src/lexer.js index f110b61d..92c308d7 100644 --- a/packages/bbob-parser/src/lexer.js +++ b/packages/bbob-parser/src/lexer.js @@ -49,10 +49,11 @@ function createLexer(buffer, options = {}) { const tokens = new Array(Math.floor(buffer.length)); const openTag = options.openTag || OPEN_BRAKET; const closeTag = options.closeTag || CLOSE_BRAKET; + const escapeTags = options.enableEscapeTags; const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM]; const NOT_CHAR_TOKENS = [ - ...(options.enableEscapeTags ? [BACKSLASH] : []), + // ...(options.enableEscapeTags ? [BACKSLASH] : []), openTag, SPACE, TAB, N, ]; const WHITESPACES = [SPACE, TAB]; @@ -62,6 +63,8 @@ function createLexer(buffer, options = {}) { const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0); const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1); const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0); + const isEscapableChar = char => (char === openTag || char === closeTag || char === BACKSLASH); + const isEscapeChar = char => char === BACKSLASH; /** * Emits newly created token to subscriber @@ -158,14 +161,9 @@ function createLexer(buffer, options = {}) { } else if (isWhiteSpace(currChar)) { const str = bufferGrabber.grabWhile(isWhiteSpace); emitToken(createToken(TYPE_SPACE, str, row, col)); - } else if (options.enableEscapeTags && currChar === BACKSLASH - && (nextChar === openTag || nextChar === closeTag)) { + } else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) { bufferGrabber.skip(); // skip the \ without emitting anything - bufferGrabber.skip(); // skip past the [ or ] as well - emitToken(createToken(TYPE_WORD, nextChar, row, col)); - } else if (options.enableEscapeTags && currChar === BACKSLASH && nextChar === BACKSLASH) { - bufferGrabber.skip(); // skip the first \ without emitting anything - bufferGrabber.skip(); // skip past the second \ and emit it + bufferGrabber.skip(); // skip past the [, ] or \ as well emitToken(createToken(TYPE_WORD, nextChar, row, col)); } else if (currChar === openTag) { bufferGrabber.skip(); // skip openTag @@ -200,9 +198,19 @@ function createLexer(buffer, options = {}) { emitToken(createToken(TYPE_WORD, currChar, row, col)); } else if (isCharToken(currChar)) { - const str = bufferGrabber.grabWhile(isCharToken); - - emitToken(createToken(TYPE_WORD, str, row, col)); + if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) { + bufferGrabber.skip(); + emitToken(createToken(TYPE_WORD, currChar, row, col)); + } else { + const str = bufferGrabber.grabWhile((char) => { + if (escapeTags) { + return isCharToken(char) && !isEscapeChar(char); + } + return isCharToken(char); + }); + + emitToken(createToken(TYPE_WORD, str, row, col)); + } } }; diff --git a/packages/bbob-parser/test/lexer.test.js b/packages/bbob-parser/test/lexer.test.js index 3a92cff2..8eb6f1ba 100644 --- a/packages/bbob-parser/test/lexer.test.js +++ b/packages/bbob-parser/test/lexer.test.js @@ -11,6 +11,7 @@ const TYPE = { }; const tokenize = input => (createLexer(input).tokenize()); +const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize()); describe('lexer', () => { const expectOutput = (output, tokens) => { @@ -289,11 +290,9 @@ describe('lexer', () => { }); test('escaped tag', () => { - const tokenizeEscape = input => (createLexer(input, { - enableEscapeTags: true - }).tokenize()); const input = '\\[b\\]test\\['; const tokens = tokenizeEscape(input); + const output = [ [TYPE.WORD, '[', '0', '0'], [TYPE.WORD, 'b', '0', '0'], @@ -306,9 +305,6 @@ describe('lexer', () => { }); test('escaped tag and escaped backslash', () => { - const tokenizeEscape = input => (createLexer(input, { - enableEscapeTags: true - }).tokenize()); const input = '\\\\\\[b\\\\\\]test\\\\\\[/b\\\\\\]'; const tokens = tokenizeEscape(input); const output = [ @@ -328,6 +324,20 @@ describe('lexer', () => { expectOutput(output, tokens); }); + test('bad closed tag with escaped backslash', () => { + const input = `[b]test[\\b]`; + const tokens = tokenizeEscape(input); + const output = [ + [TYPE.TAG, 'b', '0', '3'], + [TYPE.WORD, 'test', '0', '7'], + [TYPE.WORD, '[', '0', '8'], + [TYPE.WORD, '\\', '0', '9'], + [TYPE.WORD, 'b]', '0', '11'], + ]; + + expectOutput(output, tokens); + }); + describe('html', () => { const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();