From baa8d45a5c47f50381e6022263ec29a029a517d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Thu, 16 Jan 2025 15:56:58 +0100 Subject: [PATCH] fix: modernize rest lexer References: https://github.com/orbitalquark/scintillua/issues/76 --- lexers/rest.lua | 72 ++++++++++++++++++++-------------------------- lexers/rpmspec.lua | 10 +++---- 2 files changed, 35 insertions(+), 47 deletions(-) diff --git a/lexers/rest.lua b/lexers/rest.lua index 0507f782..f8b8a151 100644 --- a/lexers/rest.lua +++ b/lexers/rest.lua @@ -2,7 +2,7 @@ -- reStructuredText LPeg lexer. local lexer = lexer -local token, word_match, starts_line = lexer.token, lexer.word_match, lexer.starts_line +local token, starts_line = lexer.token, lexer.starts_line local P, S = lpeg.P, lpeg.S local lex = lexer.new(...) @@ -19,8 +19,7 @@ local block = '::' * (lexer.newline + -1) * function(input, index) end return #input + 1 end -lex:add_rule('literal_block', token('literal_block', block)) -lex:add_style('literal_block', lexer.styles.embedded .. {eolfilled = true}) +lex:add_rule('literal_block', lex:tag('literal_block', block)) -- Lists. local option_word = lexer.alnum * (lexer.alnum + '-')^0 @@ -31,7 +30,7 @@ local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF- local enum_list = P('(')^-1 * (lexer.digit^1 + S('ivxlcmIVXLCM')^1 + lexer.alnum + '#') * S('.)') local field_list = ':' * (lexer.any - ':')^1 * P(':')^-1 lex:add_rule('list', #(lexer.space^0 * (S('*+-:/') + enum_list)) * - starts_line(token(lexer.LIST, + starts_line(lex:tag(lexer.LIST, lexer.space^0 * (option_list + bullet_list + enum_list + field_list) * lexer.space))) local any_indent = S(' \t')^0 @@ -40,15 +39,12 @@ local prefix = any_indent * '.. ' -- Explicit markup blocks. local footnote_label = '[' * (lexer.digit^1 + '#' * word^-1 + '*') * ']' -local footnote = token('footnote_block', prefix * footnote_label * lexer.space) +local footnote = lex:tag('footnote_block', prefix * footnote_label * lexer.space) local citation_label = '[' * word * ']' -local citation = token('citation_block', prefix * citation_label * lexer.space) -local link = token('link_block', prefix * '_' * +local citation = lex:tag('citation_block', prefix * citation_label * lexer.space) +local link = lex:tag('link_block', prefix * '_' * (lexer.range('`') + (P('\\') * 1 + lexer.nonnewline - ':')^1) * ':' * lexer.space) lex:add_rule('markup_block', #prefix * starts_line(footnote + citation + link)) -lex:add_style('footnote_block', lexer.styles.label) -lex:add_style('citation_block', lexer.styles.label) -lex:add_style('link_block', lexer.styles.label) -- Sphinx code block. local indented_block = function(input, index) @@ -61,11 +57,11 @@ local indented_block = function(input, index) end local code_block = prefix * 'code-block::' * S(' \t')^1 * lexer.nonnewline^0 * (lexer.newline + -1) * indented_block -lex:add_rule('code_block', #prefix * token('code_block', starts_line(code_block))) -lex:add_style('code_block', lexer.styles.embedded .. {eolfilled = true}) +lex:add_rule('code_block', #prefix * lex:tag('code_block', starts_line(code_block))) -- Directives. -local known_directive = token('directive', prefix * word_match{ +local known_directive = lex:tag('directive', prefix * lex:word_match('directive') * '::' * lexer.space) +lex:set_word_list('directive', { -- Admonitions 'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', 'warning', 'admonition', @@ -86,8 +82,9 @@ local known_directive = token('directive', prefix * word_match{ 'replace', 'unicode', 'date', -- Miscellaneous 'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive' -} * '::' * lexer.space) -local sphinx_directive = token('sphinx_directive', prefix * word_match{ +}) +local sphinx_directive = lex:tag('sphinx_directive', prefix * lex:word_match('sphinx_directive') * '::' * lexer.space) +lex:set_word_list('sphinx_directive', { -- The TOC tree. 'toctree', -- Paragraph-level markup. @@ -97,24 +94,20 @@ local sphinx_directive = token('sphinx_directive', prefix * word_match{ 'highlight', 'literalinclude', -- Miscellaneous 'sectionauthor', 'index', 'only', 'tabularcolumns' -} * '::' * lexer.space) -local unknown_directive = token('unknown_directive', prefix * word * '::' * lexer.space) +}) +local unknown_directive = lex:tag('unknown_directive', prefix * word * '::' * lexer.space) lex:add_rule('directive', #prefix * starts_line(known_directive + sphinx_directive + unknown_directive)) -lex:add_style('directive', lexer.styles.keyword) -lex:add_style('sphinx_directive', lexer.styles.keyword .. {bold = true}) -lex:add_style('unknown_directive', lexer.styles.keyword .. {italics = true}) -- Substitution definitions. -lex:add_rule('substitution', #prefix * token('substitution', starts_line(prefix * lexer.range('|') * +lex:add_rule('substitution', #prefix * lex:tag('substitution', starts_line(prefix * lexer.range('|') * lexer.space^1 * word * '::' * lexer.space))) -lex:add_style('substitution', lexer.styles.variable) -- Comments. local line_comment = lexer.to_eol(prefix) local bprefix = any_indent * '..' local block_comment = bprefix * lexer.newline * indented_block -lex:add_rule('comment', #bprefix * token(lexer.COMMENT, starts_line(line_comment + block_comment))) +lex:add_rule('comment', #bprefix * lex:tag(lexer.COMMENT, starts_line(line_comment + block_comment))) -- Section titles (2 or more characters). local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')) @@ -135,35 +128,32 @@ local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c) return pos and index - #adm + pos - 1 or nil end) -- Token needs to be a predefined one in order for folder to work. -lex:add_rule('title', token(lexer.HEADING, overline + underline)) +lex:add_rule('title', lex:tag(lexer.HEADING, overline + underline)) -- Line block. -lex:add_rule('line_block_char', token(lexer.OPERATOR, starts_line(any_indent * '|'))) +lex:add_rule('line_block_char', lex:tag(lexer.OPERATOR, starts_line(any_indent * '|'))) -- Inline markup. -local strong = token(lexer.BOLD, lexer.range('**')) -local em = token(lexer.ITALIC, lexer.range('*')) -local inline_literal = token('inline_literal', lexer.range('``')) +local strong = lex:tag(lexer.BOLD, lexer.range('**')) +local em = lex:tag(lexer.ITALIC, lexer.range('*')) +local inline_literal = lex:tag('inline_literal', lexer.range('``')) local postfix_link = (word + lexer.range('`')) * '_' * P('_')^-1 local prefix_link = '_' * lexer.range('`') -local link_ref = token(lexer.LINK, postfix_link + prefix_link) -local role = token('role', ':' * word * ':' * (word * ':')^-1) -local interpreted = role^-1 * token('interpreted', lexer.range('`')) * role^-1 -local footnote_ref = token(lexer.REFERENCE, footnote_label * '_') -local citation_ref = token(lexer.REFERENCE, citation_label * '_') -local substitution_ref = token('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1) -local link = token(lexer.LINK, +local link_ref = lex:tag(lexer.LINK, postfix_link + prefix_link) +local role = lex:tag('role', ':' * word * ':' * (word * ':')^-1) +local interpreted = role^-1 * lex:tag('interpreted', lexer.range('`')) * role^-1 +local footnote_ref = lex:tag(lexer.REFERENCE, footnote_label * '_') +local citation_ref = lex:tag(lexer.REFERENCE, citation_label * '_') +local substitution_ref = lex:tag('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1) +local link = lex:tag(lexer.LINK, lexer.alpha * (lexer.alnum + S('-.'))^1 * ':' * (lexer.alnum + S('/.+-%@'))^1) lex:add_rule('inline_markup', (strong + em + inline_literal + link_ref + interpreted + footnote_ref + citation_ref + substitution_ref + link) * -lexer.alnum) -lex:add_style('inline_literal', lexer.styles.embedded) -lex:add_style('role', lexer.styles.class) -lex:add_style('interpreted', lexer.styles.string) -- Other. -lex:add_rule('non_space', token(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0)) -lex:add_rule('escape', token(lexer.DEFAULT, '\\' * lexer.any)) +lex:add_rule('non_space', lex:tag(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0)) +lex:add_rule('escape', lex:tag(lexer.DEFAULT, '\\' * lexer.any)) -- Section-based folding. local sphinx_levels = { @@ -202,7 +192,7 @@ local bash = lexer.load('bash') local bash_indent_level local start_rule = #(prefix * 'code-block' * '::' * lexer.space^1 * 'bash' * (lexer.newline + -1)) * - sphinx_directive * token('bash_begin', P(function(input, index) + sphinx_directive * lex:tag('bash_begin', P(function(input, index) bash_indent_level = #input:match('^([ \t]*)', index) return index end))]] diff --git a/lexers/rpmspec.lua b/lexers/rpmspec.lua index 6f2f76a6..9b535ee0 100644 --- a/lexers/rpmspec.lua +++ b/lexers/rpmspec.lua @@ -5,9 +5,6 @@ local C, P, R, S = lpeg.C, lpeg.P, lpeg.R, lpeg.S local lex = lexer.new(...) --- Whitespace. -lex:add_rule('whitespace', lex:tag(lexer.WHITESPACE, lexer.space^1)) - -- Comments. lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.to_eol('#'))) @@ -22,7 +19,7 @@ lex:add_rule('string', lex:tag(lexer.STRING, lexer.range('"'))) -- Keywords. lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.CONSTANT) + - (P('Patch') + P('Source')) * R('09')^0) + (P('Patch') + P('Source')) * R('09')^0)) lex:set_word_list(lexer.CONSTANT, { 'Prereq', 'Summary', 'Name', 'Version', 'Packager', 'Requires', 'Recommends', 'Suggests', 'Supplements', 'Enhances', 'Icon', 'URL', 'Source', 'Patch', 'Prefix', 'Packager', 'Group', @@ -33,12 +30,13 @@ lex:set_word_list(lexer.CONSTANT, { }) -- Macros -lex:add_rule('command', lex:tag(lexer.FUNCTION, S('%$')^1 * S('{')^0 * lexer.word * S('}')^0 )) +lex:add_rule('command', lex:tag(lexer.FUNCTION, + lexer.range(S('%$')^1 * S('{')^0 * lexer.word, S('}')^0))) -- Constants lex:add_rule('constant', lex:tag(lexer.CONSTANT, lex:word_match(lexer.CONSTANT))) lex:set_word_list(lexer.CONSTANT, { - 'rhel', 'fedora', 'suse_version', 'sle_version', 'x86_64' + 'rhel', 'fedora', 'suse_version', 'sle_version', 'x86_64' }) lexer.property['scintillua.comment'] = '#'