From ca2e3fdff08f3180864f5eceeff344e238e1ffd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Thu, 16 Jan 2025 15:56:58 +0100 Subject: [PATCH] fix: modernize rest lexer References: https://github.com/orbitalquark/scintillua/issues/76 --- lexers/rest.lua | 72 +++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/lexers/rest.lua b/lexers/rest.lua index 0507f782..f8b8a151 100644 --- a/lexers/rest.lua +++ b/lexers/rest.lua @@ -2,7 +2,7 @@ -- reStructuredText LPeg lexer. local lexer = lexer -local token, word_match, starts_line = lexer.token, lexer.word_match, lexer.starts_line +local token, starts_line = lexer.token, lexer.starts_line local P, S = lpeg.P, lpeg.S local lex = lexer.new(...) @@ -19,8 +19,7 @@ local block = '::' * (lexer.newline + -1) * function(input, index) end return #input + 1 end -lex:add_rule('literal_block', token('literal_block', block)) -lex:add_style('literal_block', lexer.styles.embedded .. {eolfilled = true}) +lex:add_rule('literal_block', lex:tag('literal_block', block)) -- Lists. local option_word = lexer.alnum * (lexer.alnum + '-')^0 @@ -31,7 +30,7 @@ local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF- local enum_list = P('(')^-1 * (lexer.digit^1 + S('ivxlcmIVXLCM')^1 + lexer.alnum + '#') * S('.)') local field_list = ':' * (lexer.any - ':')^1 * P(':')^-1 lex:add_rule('list', #(lexer.space^0 * (S('*+-:/') + enum_list)) * - starts_line(token(lexer.LIST, + starts_line(lex:tag(lexer.LIST, lexer.space^0 * (option_list + bullet_list + enum_list + field_list) * lexer.space))) local any_indent = S(' \t')^0 @@ -40,15 +39,12 @@ local prefix = any_indent * '.. ' -- Explicit markup blocks. local footnote_label = '[' * (lexer.digit^1 + '#' * word^-1 + '*') * ']' -local footnote = token('footnote_block', prefix * footnote_label * lexer.space) +local footnote = lex:tag('footnote_block', prefix * footnote_label * lexer.space) local citation_label = '[' * word * ']' -local citation = token('citation_block', prefix * citation_label * lexer.space) -local link = token('link_block', prefix * '_' * +local citation = lex:tag('citation_block', prefix * citation_label * lexer.space) +local link = lex:tag('link_block', prefix * '_' * (lexer.range('`') + (P('\\') * 1 + lexer.nonnewline - ':')^1) * ':' * lexer.space) lex:add_rule('markup_block', #prefix * starts_line(footnote + citation + link)) -lex:add_style('footnote_block', lexer.styles.label) -lex:add_style('citation_block', lexer.styles.label) -lex:add_style('link_block', lexer.styles.label) -- Sphinx code block. local indented_block = function(input, index) @@ -61,11 +57,11 @@ local indented_block = function(input, index) end local code_block = prefix * 'code-block::' * S(' \t')^1 * lexer.nonnewline^0 * (lexer.newline + -1) * indented_block -lex:add_rule('code_block', #prefix * token('code_block', starts_line(code_block))) -lex:add_style('code_block', lexer.styles.embedded .. {eolfilled = true}) +lex:add_rule('code_block', #prefix * lex:tag('code_block', starts_line(code_block))) -- Directives. -local known_directive = token('directive', prefix * word_match{ +local known_directive = lex:tag('directive', prefix * lex:word_match('directive') * '::' * lexer.space) +lex:set_word_list('directive', { -- Admonitions 'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', 'warning', 'admonition', @@ -86,8 +82,9 @@ local known_directive = token('directive', prefix * word_match{ 'replace', 'unicode', 'date', -- Miscellaneous 'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive' -} * '::' * lexer.space) -local sphinx_directive = token('sphinx_directive', prefix * word_match{ +}) +local sphinx_directive = lex:tag('sphinx_directive', prefix * lex:word_match('sphinx_directive') * '::' * lexer.space) +lex:set_word_list('sphinx_directive', { -- The TOC tree. 'toctree', -- Paragraph-level markup. @@ -97,24 +94,20 @@ local sphinx_directive = token('sphinx_directive', prefix * word_match{ 'highlight', 'literalinclude', -- Miscellaneous 'sectionauthor', 'index', 'only', 'tabularcolumns' -} * '::' * lexer.space) -local unknown_directive = token('unknown_directive', prefix * word * '::' * lexer.space) +}) +local unknown_directive = lex:tag('unknown_directive', prefix * word * '::' * lexer.space) lex:add_rule('directive', #prefix * starts_line(known_directive + sphinx_directive + unknown_directive)) -lex:add_style('directive', lexer.styles.keyword) -lex:add_style('sphinx_directive', lexer.styles.keyword .. {bold = true}) -lex:add_style('unknown_directive', lexer.styles.keyword .. {italics = true}) -- Substitution definitions. -lex:add_rule('substitution', #prefix * token('substitution', starts_line(prefix * lexer.range('|') * +lex:add_rule('substitution', #prefix * lex:tag('substitution', starts_line(prefix * lexer.range('|') * lexer.space^1 * word * '::' * lexer.space))) -lex:add_style('substitution', lexer.styles.variable) -- Comments. local line_comment = lexer.to_eol(prefix) local bprefix = any_indent * '..' local block_comment = bprefix * lexer.newline * indented_block -lex:add_rule('comment', #bprefix * token(lexer.COMMENT, starts_line(line_comment + block_comment))) +lex:add_rule('comment', #bprefix * lex:tag(lexer.COMMENT, starts_line(line_comment + block_comment))) -- Section titles (2 or more characters). local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')) @@ -135,35 +128,32 @@ local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c) return pos and index - #adm + pos - 1 or nil end) -- Token needs to be a predefined one in order for folder to work. -lex:add_rule('title', token(lexer.HEADING, overline + underline)) +lex:add_rule('title', lex:tag(lexer.HEADING, overline + underline)) -- Line block. -lex:add_rule('line_block_char', token(lexer.OPERATOR, starts_line(any_indent * '|'))) +lex:add_rule('line_block_char', lex:tag(lexer.OPERATOR, starts_line(any_indent * '|'))) -- Inline markup. -local strong = token(lexer.BOLD, lexer.range('**')) -local em = token(lexer.ITALIC, lexer.range('*')) -local inline_literal = token('inline_literal', lexer.range('``')) +local strong = lex:tag(lexer.BOLD, lexer.range('**')) +local em = lex:tag(lexer.ITALIC, lexer.range('*')) +local inline_literal = lex:tag('inline_literal', lexer.range('``')) local postfix_link = (word + lexer.range('`')) * '_' * P('_')^-1 local prefix_link = '_' * lexer.range('`') -local link_ref = token(lexer.LINK, postfix_link + prefix_link) -local role = token('role', ':' * word * ':' * (word * ':')^-1) -local interpreted = role^-1 * token('interpreted', lexer.range('`')) * role^-1 -local footnote_ref = token(lexer.REFERENCE, footnote_label * '_') -local citation_ref = token(lexer.REFERENCE, citation_label * '_') -local substitution_ref = token('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1) -local link = token(lexer.LINK, +local link_ref = lex:tag(lexer.LINK, postfix_link + prefix_link) +local role = lex:tag('role', ':' * word * ':' * (word * ':')^-1) +local interpreted = role^-1 * lex:tag('interpreted', lexer.range('`')) * role^-1 +local footnote_ref = lex:tag(lexer.REFERENCE, footnote_label * '_') +local citation_ref = lex:tag(lexer.REFERENCE, citation_label * '_') +local substitution_ref = lex:tag('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1) +local link = lex:tag(lexer.LINK, lexer.alpha * (lexer.alnum + S('-.'))^1 * ':' * (lexer.alnum + S('/.+-%@'))^1) lex:add_rule('inline_markup', (strong + em + inline_literal + link_ref + interpreted + footnote_ref + citation_ref + substitution_ref + link) * -lexer.alnum) -lex:add_style('inline_literal', lexer.styles.embedded) -lex:add_style('role', lexer.styles.class) -lex:add_style('interpreted', lexer.styles.string) -- Other. -lex:add_rule('non_space', token(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0)) -lex:add_rule('escape', token(lexer.DEFAULT, '\\' * lexer.any)) +lex:add_rule('non_space', lex:tag(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0)) +lex:add_rule('escape', lex:tag(lexer.DEFAULT, '\\' * lexer.any)) -- Section-based folding. local sphinx_levels = { @@ -202,7 +192,7 @@ local bash = lexer.load('bash') local bash_indent_level local start_rule = #(prefix * 'code-block' * '::' * lexer.space^1 * 'bash' * (lexer.newline + -1)) * - sphinx_directive * token('bash_begin', P(function(input, index) + sphinx_directive * lex:tag('bash_begin', P(function(input, index) bash_indent_level = #input:match('^([ \t]*)', index) return index end))]]