Skip to content

Commit

Permalink
Upgrade arden grammar to Arden syntax 3.0 R2 specification (#4251)
Browse files Browse the repository at this point in the history
* Upgrade arden grammar to Arden syntax 3.0 R2 specification

* Remove method names

---------

Co-authored-by: Christian Weich <cgw@medexter.com>
  • Loading branch information
Yngli and Christian Weich authored Sep 19, 2024
1 parent a0125e0 commit 4ed9403
Show file tree
Hide file tree
Showing 21 changed files with 2,578 additions and 1,332 deletions.
222 changes: 99 additions & 123 deletions arden/ArdenLexer.g4
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true

lexer grammar ArdenLexer;
options {
caseInsensitive = true;
}

options { caseInsensitive = true; }

// Keywords
ABS : 'abs';
ACTION : 'action:';
ACTION : 'action';
ADD : 'add';
AFTER : 'after';
AGGREGATE : 'aggregate';
Expand All @@ -21,15 +16,15 @@ APPLICABILITY : 'applicability';
ARCCOS : 'arccos';
ARCSIN : 'arcsin';
ARCTAN : 'arctan';
ARDEN : 'arden:';
ARDEN_VERSION : 'version ' ('2' ('.' ( '1' | '2' | '5' | '6' | '7' | '8' | '9' | '10'))? | '3');
ARDEN : 'arden';
ARDEN_VERSION : 'version 3';
ARETRUE : 'aretrue';
ARGUMENT : 'argument';
AS : 'as';
AT : 'at';
ATTIME : 'attime';
ATTRIBUTE : 'attribute';
AUTHOR : 'author:' -> mode(TextMode);
AUTHOR : 'author' -> mode(TextMode);
AVERAGE : 'average' | 'avg';
BE : 'be';
BEFORE : 'before';
Expand All @@ -40,20 +35,19 @@ CALL : 'call';
CASE : 'case';
CEILING : 'ceiling';
CHARACTERS : 'characters';
CITATIONS : 'citations:';
CITATIONS : 'citations' -> mode(TextMode);
CLONE : 'clone';
CONCLUDE : 'conclude';
COSINE : 'cos' 'ine'?;
COUNT : 'count';
CRISP : 'crisp';
CURRENTTIME : 'currenttime';
DATA : 'data:';
DATAWC : 'data';
DATE : 'date:';
DATA : 'data';
DATE : 'date';
DAY : 'day' 's'?;
DECREASE : 'decrease';
DEFAULT : 'default';
DEFAULTCO : 'default:' -> mode(TwoCharMode);
DEFAULT_SLOT : DEFAULT COLON -> mode(TwoCharMode);
DEFUZZIFIED : 'defuzzified';
DELAY : 'delay';
DESTINATION : 'destination';
Expand All @@ -63,21 +57,21 @@ EARLIEST : 'earliest';
ELEMENTS : 'elements';
ELSE : 'else';
ELSEIF : 'elseif';
END : 'end:';
END : 'end';
ENDDO : 'enddo';
ENDIF : 'endif';
ENDSWITCH : 'endswitch';
EQUAL : 'equal';
EVENT : 'event';
EVENTTIME : 'eventtime';
EVERY : 'every';
EVOKE : 'evoke:';
EVOKE : 'evoke';
EXIST : 'exist' 's'?;
EXP : 'exp';
EXPLANATION : 'explanation:' -> mode(TextMode);
EXPLANATION : 'explanation' -> mode(TextMode);
EXTRACT : 'extract';
FALSE : 'false';
FILENAME : 'filename:' -> mode(MlmName);
FILENAME : 'filename' -> mode(MlmName);
FIND : 'find';
FIRST : 'first';
FLOOR : 'floor';
Expand All @@ -94,35 +88,35 @@ IN : 'in';
INCLUDE : 'include';
INCREASE : 'increase';
INDEX : 'index';
INSTITUTION : 'institution:' -> mode(TextMode);
INSTITUTIONWC : 'institution';
INSTITUTION : 'institution';
INSTITUTION_SLOT: INSTITUTION COLON -> mode(TextMode);
INT : 'int';
INTERFACE : 'interface';
INTERVAL : 'interval';
IS : 'is' | 'are' | 'was' | 'were';
ISTRUE : 'istrue';
IT : 'it' | 'they';
KEYWORDS : 'keywords' -> mode(TextMode);
KNOWLEDGE : 'knowledge:';
LANGUAGE : 'language:' -> mode(TwoCharMode);
KNOWLEDGE : 'knowledge';
LANGUAGE : 'language' -> mode(TwoCharMode);
LAST : 'last';
LATEST : 'latest';
LEAST : 'least';
LEFT : 'left';
LENGTH : 'length';
LESS : 'less';
LET : 'let';
LIBRARY : 'library:';
LIBRARY : 'library';
LINGUISTIC : 'linguistic';
LINKS : 'links:';
LINKS : 'links' -> mode(TextMode);
LINK_TYPE : 'url_link' | 'mesh_link' | 'other_link' | 'exe_link';
LIST : 'list';
LOCALIZED : 'localized';
LOG10 : 'log10';
LOG : 'log';
LOGIC : 'logic:';
LOGIC : 'logic';
LOWERCASE : 'lowercase';
MAINTENANCE : 'maintenance:';
MAINTENANCE : 'maintenance';
MATCHES : 'matches';
MAXIMUM : 'max' 'imum'?;
MEDIAN : 'median';
Expand All @@ -131,8 +125,8 @@ MESSAGE : 'message';
MINIMUM : 'min' 'imum'?;
MINUTE : 'minute' 's'?;
MLM : 'mlm';
MLMNAME : 'mlmname:' -> mode(MlmName);
MLM_SELF : 'mlm' [_-]'self';
MLMNAME : 'mlmname' -> mode(MlmName);
MLM_SELF : 'mlm'[_-]'self';
MONTH : 'month' 's'?;
MOST : 'most';
NAMES : 'names';
Expand All @@ -152,12 +146,12 @@ PATTERN : 'pattern';
PERCENT : 'percent' | '%';
PRECEDING : 'preceding';
PRESENT : 'present';
PRIORITY : 'priority:';
PURPOSE : 'purpose:' -> mode(TextMode);
PRIORITY : 'priority';
PURPOSE : 'purpose' -> mode(TextMode);
READ : 'read';
REMOVE : 'remove';
REPLACE : 'replace';
RESOURCES : 'resources:';
RESOURCES : 'resources';
RETURN : 'return';
REVERSE : 'reverse';
RIGHT : 'right';
Expand All @@ -169,7 +163,7 @@ SET : 'set';
SINE : 'sin' 'e'?;
SLOPE : 'slope';
SORT : 'sort';
SPECIALIST : 'specialist:' -> mode(TextMode);
SPECIALIST : 'specialist' -> mode(TextMode);
SQRT : 'sqrt';
STARTING : 'starting';
STDDEV : 'stddev';
Expand All @@ -184,7 +178,7 @@ THAN : 'than';
THE : 'the' -> channel(HIDDEN);
THEN : 'then';
TIME : 'time';
TITLE : 'title:' -> mode(TextMode);
TITLE : 'title' -> mode(TextMode);
TO : 'to';
TODAY : 'today';
TOMORROW : 'tomorrow';
Expand All @@ -193,144 +187,126 @@ TRIM : 'trim';
TRUE : 'true';
TRUNCATE : 'truncate';
TRUTHVALUE : 'truth value';
TYPE : 'type:';
TYPE_CODE : 'data' [_-]'driven';
TYPE : 'type';
TYPE_CODE : 'data'[_-]'driven';
UNTIL : 'until';
UPPERCASE : 'uppercase';
URGENCY : 'urgency:';
URGENCY : 'urgency';
USING : 'using';
VALIDATION : 'validation:';
VALIDATION : 'validation';
VALIDATION_CODE : 'production' | 'research' | 'testing' | 'expired';
VARIABLE : 'variable';
VARIANCE : 'variance';
VERSION : 'version:' -> mode(TextMode);
VERSION : 'version' -> mode(TextMode);
WEEK : 'week' 's'?;
WEEKDAYLITERAL:
WEEKDAYLITERAL :
'sunday'
| 'monday'
| 'tuesday'
| 'wednesday'
| 'thursday'
| 'friday'
| 'saturday'
;
WHERE : 'where';
WHILE : 'while';
WITH : 'with';
WITHIN : 'within';
WRITE : 'write';
YEAR : 'year' 's'?;

// Seperators
LPAREN : '(';
RPAREN : ')';
LBRACE : '{' -> mode(DataMapping);
RBRACE : '}';
LBRACK : '[';
RBRACK : ']';
SC : ';';
DSC : ';;';
COLON : ':';
DOT : '.';
COMMA : ',';
;
WHERE : 'where';
WHILE : 'while';
WITH : 'with';
WITHIN : 'within';
WRITE : 'write';
YEAR : 'year' 's'?;

// Separators
LPAREN : '(';
RPAREN : ')';
LBRACE : '{' -> mode(DataMapping);
RBRACE : '}';
LBRACK : '[';
RBRACK : ']';
SC : ';';
DSC : ';;';
COLON : ':';
DOT : '.';
COMMA : ',';

// Operators
ASSIGN : ':=';
PLUS : '+';
MINUS : '-';
MUL : '*';
DIV : '/';
POWER : '**';
EQ : '=' | 'eq';
LT : '<' | 'lt';
GT : '>' | 'gt';
LE : '<=' | 'le';
GE : '>=' | 'ge';
NE : '<>' | 'ne';
DOR : '||';
ASSIGN : ':=';
PLUS : '+';
MINUS : '-';
MUL : '*';
DIV : '/';
POWER : '**';
EQ : '=' | 'eq';
LT : '<' | 'lt';
GT : '>' | 'gt';
LE : '<=' | 'le';
GE : '>=' | 'ge';
NE : '<>' | 'ne';
DOR : '||';

// Digit constructs
NUMBER: Digit+ DOT? Digit* Exponent | DOT Digit+ Exponent;
NUMBER : Digit+ (DOT Digit*)? Exponent? | DOT Digit+ Exponent?;

// Date constructs
TIMEOFDAY: Digit Digit COLON Digit Digit Seconds TimeZone;
TIME_OF_DAY : Digit Digit COLON Digit Digit Seconds? TimeZone?;

ISO_DATE: Digit Digit Digit Digit MINUS Digit Digit MINUS Digit Digit;
ISO_DATE : Digit Digit Digit Digit MINUS Digit Digit MINUS Digit Digit;

ISO_DATE_TIME:
Digit Digit Digit Digit MINUS Digit Digit MINUS Digit Digit 't' Digit Digit COLON Digit Digit COLON Digit Digit FractionalDigit TimeZone
;
ISO_DATE_TIME : Digit Digit Digit Digit MINUS Digit Digit MINUS Digit Digit 't' Digit Digit COLON Digit Digit COLON Digit Digit FractionalDigit? TimeZone?;

// String constructs
CITATION: Digit+ DOT ' ' ('support' | 'refute')?;
TERM : '\'' .*? '\'';

TERM: '\'' .*? '\'';

STRING: '"' ( '""' | ~'"')* '"';
STRING : '"' ( '""' | ~'"' )* '"';

// Creates an identifier with max of 80 chars
IDENTIFIER:
StartIdentifier RestIdentifier RestIdentifier RestIdentifier RestIdentifier RestIdentifier RestIdentifier RestIdentifier
;
IDENTIFIER : Letter ID*;

// Comment
COMMENT: '/*' .*? '*/' -> channel(HIDDEN);
COMMENT : '/*' .*? '*/' -> channel(HIDDEN);

LINE_COMMENT: '//' ~[\r\n]* -> channel(HIDDEN);
LINE_COMMENT : '//' ~[\r\n]* -> channel(HIDDEN);

// Whitespace
WS: [ \t\r\n]+ -> channel(HIDDEN);
WS : [ \t\r\n]+ -> channel(HIDDEN);

ERROR : .;

// Fragment rules
// Digit fragments
fragment Digit: [0-9];
fragment Digit : [0-9];

fragment FractionalDigit: (DOT Digit+)?;
fragment FractionalDigit : DOT Digit+;

fragment Exponent: ('e' [+-]? Digit+)?;
fragment Exponent : 'e' [+-]? Digit+;

// Date fragments
fragment Seconds: (COLON Digit Digit FractionalDigit)?;
fragment Seconds : COLON Digit Digit FractionalDigit?;

fragment TimeZone: 'z'? | (PLUS | MINUS) Digit Digit COLON Digit Digit;
fragment TimeZone : 'z' | (PLUS | MINUS) Digit Digit COLON Digit Digit;

// String fragments
fragment Letter: [a-z];
fragment Letter : [a-z];

// Start of the identifier 10 chars, starting with a letter
fragment StartIdentifier: Letter (ID (ID (ID (ID (ID (ID (ID (ID (ID?)?)?)?)?)?)?)?)?)?;

// Rest of the identifier 10 chars
fragment RestIdentifier: (ID (ID (ID (ID (ID (ID (ID (ID (ID (ID?)?)?)?)?)?)?)?)?)?)?;

fragment ID: Letter | [0-9] | '_';

fragment MlmID: ID | DOT | MINUS;
fragment ID : Letter | Digit | '_';

// Lexer modes
mode TextMode;
TEXT: .+? ';;' -> mode(DEFAULT_MODE);
TEXTMODECOLON : ':';
TEXT : .+?;
UTEXT : ~[;:^]+;
ADSC : ';;' -> mode(DEFAULT_MODE);

mode DataMapping;
DATA_MAPPING: (~'}' | '\\' .)+ -> mode(DEFAULT_MODE);
DATA_MAPPING : (~'}' | '\\' .)+ -> mode(DEFAULT_MODE);

mode MlmName;
MLMID:
MlmIDStart MlmIDRest MlmIDRest MlmIDRest MlmIDRest MlmIDRest MlmIDRest MlmIDRest -> mode(DEFAULT_MODE)
;
MlMCOLON : ':';
MLMID : Letter MlmID* -> mode(DEFAULT_MODE);
WS_ID : [ \t\r\n]+ -> channel(HIDDEN);

WS_ID: [ \t\r\n]+ -> channel(HIDDEN);

fragment MlmIDStart:
Letter (MlmID (MlmID (MlmID (MlmID (MlmID (MlmID (MlmID (MlmID (MlmID?)?)?)?)?)?)?)?)?)?
;
fragment MlmIDRest:
(MlmID (MlmID (MlmID (MlmID (MlmID (MlmID (MlmID (MlmID (MlmID (MlmID?)?)?)?)?)?)?)?)?)?)?
;
fragment MlmID : ID | DOT | MINUS;

mode TwoCharMode;
TWOCHARCODE options {
caseInsensitive = false;
}: Letter Letter ('_' [A-Z] [A-Z])? -> mode(DEFAULT_MODE);

WS_TCM: WS -> channel(HIDDEN);
TWOCHARCOLON : ':';
TWOCHARCODE options {caseInsensitive = false; }: Letter Letter ('_' [A-Z] [A-Z])? -> mode(DEFAULT_MODE);
WS_TCM : WS -> channel(HIDDEN);
Loading

0 comments on commit 4ed9403

Please sign in to comment.