coco_to_antlr.atg

/*-------------------------------------------------------------------------
Coco.ATG -- Attributed Grammar
Compiler Generator Coco/R,
Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
extended by M. Loeberbauer & A. Woess, Univ. of Linz
ported to C++ by Csaba Balazs, University of Szeged
with improvements by Pat Terry, Rhodes University

This program is free software; you can redistribute it and/or modify it 
under the terms of the GNU General Public License as published by the 
Free Software Foundation; either version 2, or (at your option) any 
later version.

This program is distributed in the hope that it will be useful, but 
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
for more details.

You should have received a copy of the GNU General Public License along 
with this program; if not, write to the Free Software Foundation, Inc., 
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

As an exception, it is allowed to write an extension of Coco/R that is
used as a plugin in non-free software.

If not otherwise stated, any source code generated by Coco/R (other than 
Coco/R itself) does not fall under the GNU General Public License.
-------------------------------------------------------------------------*/
/*-------------------------------------------------------------------------
 compile with:
   Coco Coco.ATG -namespace Coco
-------------------------------------------------------------------------*/

#include <ostream>
#include <sstream>
#include <iostream>
#include <cassert>
#include <list>
#include <unordered_set>
#include <functional>
#include "common.h"
#include "CocoAstMain.h"

COMPILER Coco

template<class Key>
using set = std::unordered_set<Key>;
template<class Key, class Value>
using map = std::unordered_map<Key, Value>;

bool convert_instrumentation = true;
std::reference_wrapper<std::wostream> output_stream = std::ref(std::wcout);
// buffer these sections to output them *after* the parser for ANTLR
std::wstringstream characters, tokens;

std::list< CocoAST::Production > productions_ast;

output_type current_output = output_type::productions;
std::wostream& output() {
    switch(current_output) {
        case output_type::productions:
            return output_stream.get();
        case output_type::tokens:
            return tokens;
        case output_type::charsets:
            return characters;
    }
}

/** copy the range [pos_start:pos_end) from scanner to output, obeying copy_mode */
void copy_verbatim(std::wostream& output, int pos_start, int pos_end, copy_mode mode = copy_mode::copy) {
    const wchar_t* attr = scanner->buffer->GetString(pos_start, pos_end);
    switch(mode) {
    case copy_mode::copy:
        output << attr;
        break;

    case copy_mode::warn:
        if(std::regex_search(attr, token_object_regex))
            output << L"/*" C2A_TODO "references to current Coco Token in this code block, adjust for ANTLR: */\n";
        output << attr;
        break;

    case copy_mode::replace:
        std::wcmatch match;
        // cannot use regex_iterator because it doesn't give us access to rest behind last match (here: attr after loop)
        // cannot use regex_token_iterator because it doesn't tell us index of matched subgroup
        while(std::regex_search(attr, match, token_object_regex)) {
            output << match.prefix();
            for(size_t i = 2; i < match.size(); ++i) { // all relevant groups
                auto sub_match = match[i];
                if(sub_match.matched) {
                    output << token_object_replace_map.at(i);
                    attr = sub_match.second; // rest of the input
                    break;
                }
            }
        }
        output << attr;
        break;
    };
}

size_t literal_token_index = 0;

bool in_verbatim = true;

//TODO: all that information about symbols is probably better served by a real symbol table class

// maps from coco character set names to their uppercase-variant as used for ANTLR
UBiMap<std::wstring, std::wstring> charset_names;
// maps from coco token names to their uppercase-variant as used for ANTLR
UBiMap<std::wstring, std::wstring> token_names;

//maps from coco production rules to their arguments, telling if they are extracted output parameters (see AttrDecl)
std::unordered_map<std::wstring, std::vector<bool>> production_output_arguments;

/**
 * @return the name to be used in ANTLR instead of coco_name
 * @param in_section which section the name was defined in, i.e. what type it has.
 *  NOTE: assumes that once we got into tokens section, no more charsets are defined, similarly for token & production
 */
//TODO: name translation may yield keywords of target language (e.g. Char->char)
std::wstring define_name(name_type in_section, const std::wstring& coco_name) {
    switch (in_section) {
        case name_type::charset:
            if(charset_names.contains_key(coco_name)) {
                return charset_names.at(coco_name);
            } else {
                auto antlr_name_raw = coco_name.substr(coco_name.find_first_not_of(L'_'));
                antlr_name_raw[0] = std::toupper((unsigned char) antlr_name_raw[0]);
                auto antlr_name = antlr_name_raw;
                size_t i = 2;
                while (charset_names.contains_value(antlr_name)) {
                    antlr_name = antlr_name_raw + L"_" + std::to_wstring(i);
                    ++i;
                }
                charset_names.insert(coco_name, antlr_name);
                return antlr_name;
            }
        case name_type::token:
            if(token_names.contains_key(coco_name)) {
                return token_names.at(coco_name);
            } else {
                auto antlr_name_raw = coco_name.substr(coco_name.find_first_not_of(L'_'));
                antlr_name_raw[0] = std::toupper((unsigned char) antlr_name_raw[0]);
                auto antlr_name = antlr_name_raw;
                size_t i = 2;
                while (charset_names.contains_value(antlr_name)
                       || token_names.contains_value(antlr_name)) {
                    antlr_name = antlr_name_raw + L"_" + std::to_wstring(i);
                    ++i;
                }
                token_names.insert(coco_name, antlr_name);
                return antlr_name;
            }
    }
}


int indent_level = 0;

/** adjust current indent_level by @param increment (positive or negative).
  * @return whitespaces for current/new indentation level
  */
std::wstring indent(int increment = 0) {
    indent_level += increment;
    assert(indent_level >= 0);
    indent_level = std::max(0, indent_level);
    return std::wstring(indent_level, '\t');
}


CHARACTERS
    letter    = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_".
    digit     = "0123456789".
    cr        = '\r'.
    lf        = '\n'.
    tab       = '\t'.
    stringCh  = ANY - '"' - '\\' - cr - lf.
    charCh    = ANY - '\'' - '\\' - cr - lf.
    printable =  '\u0020' .. '\u007e'.
    hex       = "0123456789abcdef".

    other = ANY - '/' - '*'.
    no_lf = ANY - lf.

TOKENS
    ident     = letter { letter | digit }.
    number    = digit { digit }.
    string    = '"' { stringCh | '\\' printable } '"'.
    badString = '"' { stringCh | '\\' printable } (cr | lf).
    char      = '\'' ( charCh | '\\' printable { hex } ) '\''.

PRAGMAS
    ddtSym    = '$' { digit | letter }.
    optionSym = '$' letter { letter } '='
                { digit | letter
                | '-' | '.' | ':'
                }.

    comment1 = "/*" {'/' | other | '*' {'*'} other} '*' {'*'} '/'. //TODO: NESTED comments not supported!
        (. if(!in_verbatim && current_output != output_type::productions)
               output() << la->val << '\n';
        .)
    comment2 = "//" { no_lf } lf.
        (. if(!in_verbatim && current_output != output_type::productions)
               output() << la->val << '\n';
        .)

IGNORE cr + lf + tab

/*-------------------------------------------------------------------------*/

PRODUCTIONS

Coco                            (. token_names.insert(L"EOF", L"EOF"); /* special token in both languages */ .)
=
                                (. auto pos_start_includes = la->pos; .)
  { ANY }                       (. auto pos_end_includes = la->pos;
                                   in_verbatim = false;
                                .)
                                
  "COMPILER" ident              (. output() << "grammar " << t->val << ";\n\n";
                                   if(convert_instrumentation) {
                                       output() << "@parser::header {\n";
                                       copy_verbatim(output(), pos_start_includes, pos_end_includes, copy_mode::warn);
                                       output() << "\n}\n\n";
                                   }

                                   auto pos_start_members = la->pos;
                                   in_verbatim = true;
                                .)
  { ANY }                       (. in_verbatim = false;
                                   if(convert_instrumentation) {
                                       output() << "@parser::members {\n";
                                       copy_verbatim(output(), pos_start_members, la->pos, copy_mode::warn);
                                       output() << "\n}\n\n";
                                   }
                                .)

  [ "IGNORECASE"                (. output() << "//" C2A_TODO "IGNORECASE not possible in ANTLR, see <https://github.com/antlr/antlr4/blob/master/doc/case-insensitive-lexing.md>\n"; .)
  ]
  [ "CHARACTERS"                (. current_output = output_type::charsets;
                                   output() << "// CHARACTERS from line " << std::to_wstring(t->line) << '\n';
                                .)
    { SetDecl }
  ]

                                (. current_output = output_type::tokens; .)
  [ "TOKENS"                    (. output() << "// TOKENS from line " << std::to_wstring(t->line) << '\n'; .)
    { TokenDecl<L""> }          (. output() << '\n'; .)
  ]
  [ "PRAGMAS"                   (. output() << "// PRAGMAS from line " << std::to_wstring(t->line) << '\n'; .)
    { TokenDecl<L"HIDDEN"> }    (. output() << '\n'; .)
  ]
  { "COMMENTS"                  (. output() << "// COMMENTS from line " << std::to_wstring(t->line) << '\n';
                                   output() << define_name(name_type::token, L"COMMENT_" + std::to_wstring(t->pos))
                                            << ": (\n";
                                   indent(+1);
                                .)
    "FROM" TokenExpr            (. output() << " .*?\n"; .)
    "TO" TokenExpr
    [ "NESTED"                  (. output() << '\n' << indent()
                                            << "// " C2A_TODO "implement NESTED for this COMMENT\n";
                                .) //TODO: we can implement NESTED with recursive lexer rules
    ]                           (. output() << indent(-1) << ") -> channel(HIDDEN);\n\n"; .)
  }                             (. output() << '\n'; .)
  { "IGNORE"                    (. output() << "// IGNORE from line " << std::to_wstring(t->line) << '\n';
                                   output() << define_name(name_type::token, L"IGNORE") << ": (\n"
                                            << indent(+1);
                                .)
    Set                         (. output() << ") -> skip;\n";
                                    indent(-1);
                                 .)
  }
                                (. output() << "//Coco default: ignore blanks\n"
                                            << define_name(name_type::token, L"IGNORE_WHITESPACE")
                                            << ": ' ' -> channel(HIDDEN);\n";
                                .)

  SYNC                          (. current_output = output_type::productions;
                                   output() << "\n\n";
                                .)
  "PRODUCTIONS"                 (. output() << "// PRODUCTIONS from line " << std::to_wstring(t->line) << '\n'; .)
  {                             (. CocoAST::Production node; .)
    Production<node>            (. productions_ast.emplace_back(std::move(node)); .)
  }
                                (. generate_antlr_code(output(), scanner->buffer, token_names, productions_ast); .)
  "END" ident                   (. output() << "\n\n" << tokens.str()
                                            << "\n\n" << characters.str();
                                .)
  '.'
  EOF
.


Production<CocoAST::Production& node>
=
    ident
    (. node.name = std::wstring(t->val); .)
    [
      (. node.attr_decl = std::make_unique<CocoAST::AttrDecl>(); .)
      AttrDecl<*node.attr_decl>
    ]
    [
      (. node.sem_text = std::make_unique<CocoAST::SemText>(); .)
      SemText<*node.sem_text>
    ]
    WEAK '='
    Expression<node.expression>
    WEAK '.'
.


/*------------------------------------------------------------------------------------*/

SetDecl
=
  ident                         (. output() << "fragment " << define_name(name_type::charset, t->val)
                                            << ":\n" << indent(+1);
                                .)
  '=' Set
  '.'                           (. output() << '\n' << indent() << ";\n\n";
                                   indent(-1);
                                .)
.

/*------------------------------------------------------------------------------------*/

Set
=
  SimSet
  { '+'                         (. output() << '\n' << indent() << "| "; .)
    SimSet
  | '-'                         (. output() << '\n' << indent()
                                            << "//" C2A_TODO "the following subset was substracted from the previous, reformulate for ANTLR"
                                            << '\n' << indent() << "~("; .)
    SimSet                      (. output() << ")\n"; .)
  }
.

/*------------------------------------------------------------------------------------*/

SimSet
=
( ident                         (. if(charset_names.contains_key(t->val))
                                       output() << charset_names.at(t->val);
                                .)
|                               (. std::wstring chars; .)
  String<chars>                 (. // remove "-" if in chars, because in ANTLR it denotes character range
                                   auto removed = std::remove(chars.begin(), chars.end(), '-');
                                   if(removed != chars.end())
                                       output() << "'-' | ";
                                   chars.erase(removed, chars.end());

                                   output() << '[' << chars << ']';
                                .)
|                               (. std::wstring name; .)
  Char<name>                    (. output() << "'" << name << "'"; .)
  [ ".." Char<name>             (. output() << "..'" << name << "'"; .)
  ]
| "ANY"                         (. output() << '.'; .)
)
.

Char<std::wstring& name>
=
    char                        (. name = std::wstring(t->val);

                                   name = name.substr(1, name.length() -2); // remove ''
                                   size_t pos = -2;
                                   while (pos = name.find(L'\\', pos+2)) {
                                       if(pos == std::wstring::npos)
                                           break; // no more escapes

                                       const auto& escaped = name[pos+1];
                                       if (coco_and_antlr_escaped_chars.find(escaped) == std::wstring::npos) {
                                           // some non-common escape char
                                           //FIXME: name does not necessarily go to output()
                                           output() << "/*" C2A_TODO "escaped character, check meaning: */";
                                           break;
                                       }
                                   }
                                .)
.

String<std::wstring& name>
=   string                      (. name = std::wstring(t->val);
                                   name = name.substr(1, name.length() -2); // remove ""
                                   if(name.find(L'\\') != std::wstring::npos)
                                       //FIXME: name does not necessarily go to output()
                                       output() << "/*" C2A_TODO "escaped characters in string, check meaning: */ ";
                                   for(auto pos = name.find(L'\'');
                                       pos != std::wstring::npos;
                                       pos = name.find(L'\'', pos+2))
                                   {
                                       name.replace(pos, 1, L"\\'");
                                   }
                                .)
.

/*------------------------------------------------------------------------------------*/

TokenDecl<std::wstring channel>
=
  (. CocoAST::Sym sym_node; .)
  Sym<sym_node>
                                (. if(sym_node.literal) {
                                       // raw string or char token, coco has no name for it
                                       output() << "LITERAL_" << std::to_wstring(literal_token_index)
                                              << ":'" << sym_node.name << "'";
                                       ++literal_token_index;
                                   } else {
                                       output() << define_name(name_type::token, sym_node.name) << ':';
                                   }
                                .)
  SYNC                          (. output() << '\n';
                                   indent(+1);
                                   if(! channel.empty())
                                       output() << indent() << "(\n";
                                .)
  ( '=' TokenExpr '.'
  |                             (. output() << " //" C2A_TODO "named token without body from Coco\n"; .)
  )
  [                             (. output() << '\n' << indent(); CocoAST::SemText node; .)
    SemText<node>
    (.  output() << "{ ";
        copy_verbatim(output(), node.pos_start, node.pos_end, copy_mode::replace);
        output() << " }";
    .)
  ]
                                (. if(! channel.empty()) {
                                       output() << '\n' << indent() << ')';
                                       output() << "-> channel(" << channel << ')';
                                   }
                                   output() << '\n' << indent() << ";\n\n";
                                   indent(-1);
                                .)
.

/*------------------------------------------------------------------------------------*/

/**
 * AttrDecl: rule for formal attributes of a rule definition. Parses the instrumented language!
 *
 * NOTE: Coco C++ mandates by-reference arguments for output,
 * ANTLR doesn't allow them & mandates "returns []" clause and an action to assign to the return variable(s)
 *
 * //TODO: only C++ supported yet, Java C# etc have other syntax ("out" parameter)
 * //TODO: parameter names may conflict with rule names
 */
AttrDecl<CocoAST::AttrDecl& node>
=                               (. std::wstring decl, name;
                                   bool is_output_param;
                                .)
//TODO: may be empty <braces>
	(
		'<'
		[
			SingleAttrDecl<decl, name, is_output_param>
	        (. node.attributes.emplace_back(decl, name, is_output_param); .)
			{ ',' SingleAttrDecl<decl, name, is_output_param>
	            (. node.attributes.emplace_back(decl, name, is_output_param); .)
			}
		]
		'>'
	|   "<."
		[
			SingleAttrDecl_alt<decl, name, is_output_param>
	        (. node.attributes.emplace_back(decl, name, is_output_param); .)
			{ ',' SingleAttrDecl_alt<decl, name, is_output_param>
	            (. node.attributes.emplace_back(decl, name, is_output_param); .)
			}
		]
		".>"
	)
.

SingleAttrDecl<std::wstring& decl, std::wstring& name, bool& is_output_param>
=                               (. int pos_start = la->pos,  pos_end;
                                   in_verbatim = true;
                                .)
    CppTypeSpec<pos_end, is_output_param>
                                (. in_verbatim = false;
                                   std::wostringstream decl_stream;
                                   copy_verbatim(decl_stream, pos_start, pos_end);
                                .)
    ident                       (. name = t->val;
                                   decl = decl_stream.str();
                                .)
.

//! copy of SingleAttrDecl, but with Follow-set {, .>} instead of {, >}
SingleAttrDecl_alt<std::wstring& decl, std::wstring& name, bool& is_output_param>
=                               (. int pos_start = la->pos,  pos_end;
                                   in_verbatim = true;
                                .)
    CppTypeSpec<pos_end, is_output_param>
                                (. in_verbatim = false;
                                   std::wostringstream decl_stream;
                                   copy_verbatim(decl_stream, pos_start, pos_end);
                                .)
    ident                       (. name = t->val;
                                   decl = decl_stream.str();
                                .)
.

//NOTE: cpp parameter declaration does not implement multi-word typenames (e.g. signed long) except const sometimes
CppTypeSpec<int& pos_end, bool& is_output_param>
=                               (. int dummy_i; bool dummy_b;
                                   is_output_param = false;
                                .)
	[ "const" ]
	ident { "::" ident }
	[   '<' CppTypeSpec<dummy_i, dummy_b>
		{ ',' CppTypeSpec<dummy_i, dummy_b> } '>'
	]
	[ "*" ]
	[ "const" ]                 (. pos_end = la->pos; .)
	[ "&"                       (. is_output_param = true; .)
	]
.

/*------------------------------------------------------------------------------------*/

Expression<CocoAST::Expression& node>
=
  (. CocoAST::Term first; .)
  Term<first>
  (. node.terms.push_back(std::move(first)); .)
  {
    (. CocoAST::Term next; .)
    WEAK '|'
    Term<next>
    (. node.terms.push_back(std::move(next)); .)
  }
.

Term<CocoAST::Term& node>
=
(   [
        (. node.resolver = std::make_unique<CocoAST::Resolver>(); .)
        Resolver<*node.resolver>
    ]
    (. node.factors.emplace_back(); .)
    Factor<node.factors.back()>
    {
        (. node.factors.emplace_back(); .)
        Factor<node.factors.back()>
    }
|
)
.

//TODO: comments are output earlier than correct
Factor<. std::unique_ptr<CocoAST::Factor>& node_ptr .>
=
(
	[ "WEAK" ]
	(. auto node = new CocoAST::Factor_Sym(); node_ptr.reset(node); .)
	Sym<. node->sym .>
	[
		(. node->attribs = std::make_unique<CocoAST::Attribs>(); .)
		Attribs<. *node->attribs .>
	]

| '('                           (. auto node = new CocoAST::Factor_Braced(); node_ptr.reset(node); .)
	Expression<. node->expression .> ')'
| '['                           (. auto node = new CocoAST::Factor_Optional(); node_ptr.reset(node); .)
	Expression<. node->expression .> ']'
| '{'                           (. auto node = new CocoAST::Factor_Iterate(); node_ptr.reset(node); .)
	Expression<. node->expression .> '}'
|   (. auto node = new CocoAST::Factor_SemText(); node_ptr.reset(node); .)
	SemText<. node->semText .>
| "ANY"                         (. node_ptr = std::make_unique<CocoAST::Factor_ANY>(); .)
| "SYNC"                        (. node_ptr = std::make_unique<CocoAST::Factor_SYNC>(); .)
)
.

/*------------------------------------------------------------------------------------*/

Resolver<CocoAST::Resolver& node>
=
  "IF" "("                       (. node.pos_start = la->pos;
                                    in_verbatim = true;
                                 .)
  Condition<node.pos_end>        (. in_verbatim = false; .)
.

Condition<int& end_pos>
=
	{ "(" Condition<end_pos> | ANY } ")"
	(. end_pos = t->pos + 1; .)
.

/*------------------------------------------------------------------------------------*/

TokenExpr
=                               (. output() << indent(); .)
  TokenTerm
  {
    WEAK '|'                    (. output() << '\n' << indent(-1) << "|\t";
                                   indent(+1);
                                .)
    TokenTerm
  }
.

TokenTerm
=
  TokenFactor
  {                             (. output() << '\n' << indent(); .)
    TokenFactor
  }
  [ "CONTEXT"                   (. output() << '\n' << indent() << "/*" C2A_TODO "CONTEXT specification not supported in ANTLR\n"; .)
    '(' TokenExpr               (. output() << "*/\n"; .)
    ')'
  ]
.

TokenFactor                     (. bool is_literal; .)
=
  (. CocoAST::Sym sym_node; .)
  Sym<sym_node>
                                (. if(sym_node.literal) {
                                       output() << "'" << sym_node.name << "'";
                                   } else {
                                       if (! charset_names.contains_key(sym_node.name)) {
                                           SemErr(std::wstring(L"Character Class used but not defined: "
                                                               + sym_node.name).c_str());
                                           output() << sym_node.name;
                                       } else {
                                           output() << charset_names.at(sym_node.name);
                                       }
                                   }
                                .)

//TODO: only output braces around these 3 subrules when TokenExpr is not a single Sym. see CocoAST::AntlrOptimizer
| '('                           (. output() << "(\n"; indent(+1); .)
    TokenExpr ')'               (. output() << '\n' << indent(-1) << ')'; .)
| '['                           (. output() << "(\n"; indent(+1); .)
    TokenExpr ']'               (. output() << '\n' << indent(-1) << ")?"; .)
| '{'                           (. output() << "(\n"; indent(+1); .)
    TokenExpr '}'               (. output() << '\n' << indent(-1) << ")*"; .)
.

/*------------------------------------------------------------------------------------*/

Sym<CocoAST::Sym& node>
=
( ident                         (. node.name = std::wstring(t->val);
                                   node.literal = false;
                                .)
| String<node.name>             (. node.literal = true; .)
| Char<node.name>               (. node.literal = true; .)
)
.

/*------------------------------------------------------------------------------------*/

/**
 * Attribs: rule for actual attributes of a rule invocation. Parses the instrumented language!
 *
 * NOTE: Coco C++ mandates by-reference arguments for output,
 * ANTLR doesn't allow them & mandates "returns []" clause and an action to assign to the return variable(s)
 *
 * //TODO: only C++ supported yet, Java C# etc have other syntax ("out" parameter)
 */
Attribs<CocoAST::Attribs& node>
=
//TODO: may be empty <braces>
	(
		'<'
		//FIXME: comma , in (braced) expression fails
		[
			(. auto pos_start = la->pos; .)
			ANY{ANY}
			(. std::wostringstream expr;
			   copy_verbatim(expr, pos_start, la->pos);
			   node.attributes.emplace_back(expr.str(), false /* will be set in CocoAST::SymbolResolver */ ); .)
			{ ','
				(. auto pos_start = la->pos; .)
				ANY{ANY}
				(. std::wostringstream expr;
				   copy_verbatim(expr, pos_start, la->pos);
				   node.attributes.emplace_back(expr.str(), false /* will be set in CocoAST::SymbolResolver */ ); .)
			}
		]
		'>'
	|   "<."
		[
			(. auto pos_start = la->pos; .)
			ANY{ANY}
			(. std::wostringstream expr;
			   copy_verbatim(expr, pos_start, la->pos);
			   node.attributes.emplace_back(expr.str(), false /* will be set in CocoAST::SymbolResolver */ ); .)
			{ ','
				(. auto pos_start = la->pos; .)
				ANY{ANY}
				(. std::wostringstream expr;
				   copy_verbatim(expr, pos_start, la->pos);
				   node.attributes.emplace_back(expr.str(), false /* will be set in CocoAST::SymbolResolver */ ); .)
			}
		]
		".>"
	)
.

//TODO: variables/anything containing $ allowed in Coco, fails ANTLR
SemText<CocoAST::SemText& node>
=
	"(."
	(. node.pos_start = la->pos;
	   in_verbatim = true;
	.)
	{ ANY | badString | "(." }

	(. in_verbatim = false;
	   node.pos_end = la->pos;
	.)
	".)"
.

END Coco.