fix(parser): infinity loop problem when escape [\b] (#31)

With enableEscapeTags: true, when trying to write [b]test[\b] page is crashed. Fixes #23
JiLiZART · Jun 30, 2019 · b4cf271 · b4cf271
1 parent 3d5c1f1
commit b4cf271
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 17 deletions.
diff --git a/packages/bbob-parser/src/lexer.js b/packages/bbob-parser/src/lexer.js
@@ -49,10 +49,11 @@ function createLexer(buffer, options = {}) {
   const tokens = new Array(Math.floor(buffer.length));
   const openTag = options.openTag || OPEN_BRAKET;
   const closeTag = options.closeTag || CLOSE_BRAKET;
+  const escapeTags = options.enableEscapeTags;
 
   const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
   const NOT_CHAR_TOKENS = [
-    ...(options.enableEscapeTags ? [BACKSLASH] : []),
+    // ...(options.enableEscapeTags ? [BACKSLASH] : []),
     openTag, SPACE, TAB, N,
   ];
   const WHITESPACES = [SPACE, TAB];
@@ -62,6 +63,8 @@ function createLexer(buffer, options = {}) {
   const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
   const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
   const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
+  const isEscapableChar = char => (char === openTag || char === closeTag || char === BACKSLASH);
+  const isEscapeChar = char => char === BACKSLASH;
 
   /**
    * Emits newly created token to subscriber
@@ -158,14 +161,9 @@ function createLexer(buffer, options = {}) {
     } else if (isWhiteSpace(currChar)) {
       const str = bufferGrabber.grabWhile(isWhiteSpace);
       emitToken(createToken(TYPE_SPACE, str, row, col));
-    } else if (options.enableEscapeTags && currChar === BACKSLASH
-               && (nextChar === openTag || nextChar === closeTag)) {
+    } else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) {
       bufferGrabber.skip(); // skip the \ without emitting anything
-      bufferGrabber.skip(); // skip past the [ or ] as well
-      emitToken(createToken(TYPE_WORD, nextChar, row, col));
-    } else if (options.enableEscapeTags && currChar === BACKSLASH && nextChar === BACKSLASH) {
-      bufferGrabber.skip(); // skip the first \ without emitting anything
-      bufferGrabber.skip(); // skip past the second \ and emit it
+      bufferGrabber.skip(); // skip past the [, ] or \ as well
       emitToken(createToken(TYPE_WORD, nextChar, row, col));
     } else if (currChar === openTag) {
       bufferGrabber.skip(); // skip openTag
@@ -200,9 +198,19 @@ function createLexer(buffer, options = {}) {
 
       emitToken(createToken(TYPE_WORD, currChar, row, col));
     } else if (isCharToken(currChar)) {
-      const str = bufferGrabber.grabWhile(isCharToken);
-
-      emitToken(createToken(TYPE_WORD, str, row, col));
+      if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) {
+        bufferGrabber.skip();
+        emitToken(createToken(TYPE_WORD, currChar, row, col));
+      } else {
+        const str = bufferGrabber.grabWhile((char) => {
+          if (escapeTags) {
+            return isCharToken(char) && !isEscapeChar(char);
+          }
+          return isCharToken(char);
+        });
+
+        emitToken(createToken(TYPE_WORD, str, row, col));
+      }
     }
   };
 

diff --git a/packages/bbob-parser/test/lexer.test.js b/packages/bbob-parser/test/lexer.test.js
@@ -11,6 +11,7 @@ const TYPE = {
 };
 
 const tokenize = input => (createLexer(input).tokenize());
+const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize());
 
 describe('lexer', () => {
   const expectOutput = (output, tokens) => {
@@ -289,11 +290,9 @@ describe('lexer', () => {
   });
 
   test('escaped tag', () => {
-    const tokenizeEscape = input => (createLexer(input, {
-      enableEscapeTags: true
-    }).tokenize());
     const input = '\\[b\\]test\\[';
     const tokens = tokenizeEscape(input);
+
     const output = [
       [TYPE.WORD, '[', '0', '0'],
       [TYPE.WORD, 'b', '0', '0'],
@@ -306,9 +305,6 @@ describe('lexer', () => {
   });
 
   test('escaped tag and escaped backslash', () => {
-    const tokenizeEscape = input => (createLexer(input, {
-      enableEscapeTags: true
-    }).tokenize());
     const input = '\\\\\\[b\\\\\\]test\\\\\\[/b\\\\\\]';
     const tokens = tokenizeEscape(input);
     const output = [
@@ -328,6 +324,20 @@ describe('lexer', () => {
     expectOutput(output, tokens);
   });
 
+  test('bad closed tag with escaped backslash', () => {
+    const input = `[b]test[\\b]`;
+    const tokens = tokenizeEscape(input);
+    const output = [
+      [TYPE.TAG, 'b', '0', '3'],
+      [TYPE.WORD, 'test', '0', '7'],
+      [TYPE.WORD, '[', '0', '8'],
+      [TYPE.WORD, '\\', '0', '9'],
+      [TYPE.WORD, 'b]', '0', '11'],
+    ];
+
+    expectOutput(output, tokens);
+  });
+
   describe('html', () => {
     const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();