Skip to content

Latest commit

 

History

History
271 lines (211 loc) · 8.1 KB

README.md

File metadata and controls

271 lines (211 loc) · 8.1 KB

spell1

An LL(1) parser generator for Erlang and LFE

Project Logo

This is a basic LL(1) parser generator for use together with Erlang. There are two interfaces for different languages, Erlang and LFE, and others can be added. The generator has been split into separate files for handling the grammar and the reading of grammar files and outputing the resultant module code. The grammar is processed in the file spell1_core.erl and the IO-files for Erlang and LFE are inte spell1.erl and lspell1.lfe. It is easy to add IO-files for other languages.

Using the generated parser

The parser which spell1 generates contains by default two exported functions. They have the same name as the root symbol (see below) and are the default entry into the parser. They have the type:

<root symbol>(Tokens) -> Return.
<root symbol>(Continuation, Tokens) -> Return.

    Types:
    
        Tokens = [Token]
        Continuation = [] | continuation()
        Token = {Type,LINE} | {Type,LINE,Value}
        Return = {ok,Form,RestTokens} | {more,Continuation} |
                 {error,Error,RestTokens}

In a token: Type is one of the terminals; LINE is some form of metadata about the token usually containing the line number; and Value gives a specific value for that terminal, for example the actual number or atom.

The functions returns return any remaining tokens which weren't used in the form, these can then be used to parse the next form. When the functions return {more,Continuation} they don't have enough tokens to parse a form and the function can be called again with the continuation and more tokens to continue parsing. This can be repeated until enough tokens have been received and the form is parsed.

Grammar definition files

The grammar definitions are in files with the default extension .spell1. A grammar file must: list all the terminal and non-terminal symbols; indicate the root symbol; give all the grammar rules; and give any extra code. The default function for parsing has the same name as the root symbol.

Erlang

The syntax of the Erlang grammar file is the same as for yecc but only the following declarations are recognised: Terminals, Nonterminals and Rootsymbol. Other declarations generate a warning and are ignored. Rules take the same format as in yecc.

LFE

The syntax is of the LFE grammar file recognises the following forms:

(terminals terminal-1 terminal-2 ...)
(non-terminals non-terminal-1 non-terminal-2 ...)
(root-symbol root)
(rule non-term (symbol-1 ...) code)

An optional section containing LFE code starts with the declaration lfe-code.

Example grammar files

Here is the grammar file for LFE both as an Erlang version and as an LFE version.

Erlang

Terminals
    symbol number string binary fun '(' ')' '[' ']' '.' '\'' '`' ',' ',@'
    '#(' '#B(' '#M(' '#.' '#\''.

Nonterminals form sexpr list list_tail proper_list .

Rootsymbol form.

form -> sexpr : '$1'.
sexpr -> symbol : value('$1').
sexpr -> number : value('$1').
sexpr -> string : value('$1').
sexpr -> binary : value('$1').
sexpr -> '#\'' : make_fun(value('$1')).
sexpr -> '#.' sexpr : eval_expr(line('$1'), '$2').
sexpr -> '\'' sexpr : [quote,'$2'].
sexpr -> '`' sexpr : [backquote,'$2'].
sexpr -> ',' sexpr : [comma,'$2'].
sexpr -> ',@' sexpr : ['comma-at','$2'].
sexpr -> '(' list ')' : '$2'.
sexpr -> '[' list ']' : '$2'.
sexpr -> '#(' proper_list ')' : list_to_tuple('$2').
sexpr -> '#B(' proper_list ')' :
        make_bin(line('$1'), '$2').
sexpr -> '#M(' proper_list ')' :
        make_map(line('$1'), '$2').
list -> sexpr list_tail : ['$1'|'$2'].
list -> '$empty' : [].
list_tail -> sexpr list_tail : ['$1'|'$2'].
list_tail -> '.' sexpr : '$2'.
list_tail -> '$empty' : [].
proper_list -> sexpr proper_list : ['$1'|'$2'].
proper_list -> '$empty' : [].

%% Extra Erlang code.
Erlang code.

-define(CATCH(E, Error), try E catch _:_ -> Error end).

%% For backwards compatibility
-export([sexpr/1,sexpr/2]).

sexpr(Ts) -> form(Ts).
sexpr(Cont, Ts) -> form(Cont, Ts).

%% make_fun(String) -> FunList.
%%  Convert a fun string to a fun sexpr.
%%    "F/A" -> ['fun', F, A].
%%    "M:F/A" -> ['fun', M, F, A].

make_fun("=:=/2") ->
    ['fun', '=:=', 2];
make_fun(FunStr) ->
    J = string:rchr(FunStr, $/),
    A = list_to_integer(string:substr(FunStr, J + 1)),
    case string:chr(FunStr, $:) of
        0 ->
            F = list_to_atom(string:substr(FunStr, 1, J - 1)),
            ['fun', F, A];
        I ->
            F = list_to_atom(string:substr(FunStr, I + 1, J - I - 1)),
            M = list_to_atom(string:substr(FunStr, 1, I - 1)),
            ['fun', M, F, A]
    end.

%% make_bin(Line, Segments) -> Binary.
%%  Make a binary from the segments.

make_bin(Line, Segs) ->
    ?CATCH(lfe_eval:expr([binary|Segs]),
           return_error(Line, "bad binary")).

%% make_map(Line, Elements) -> Map.
%%  Make a map from the key/value elements.

make_map(Line, Es) ->
    ?CATCH(maps:from_list(pair_list(Es)),
           return_error(Line, "bad map")).

%% pair_list(List) -> [{A,B}].
%%  Generate a list of tuple pairs from the elements. An error if odd
%%  number of elements in list.

pair_list([A,B|L]) -> [{A,B}|pair_list(L)];
pair_list([]) -> [].

%% eval_expr(Line, Expr) -> Val.
%%  Evaluate #. expression.

eval_expr(Line, Expr) ->
    ?CATCH(lfe_eval:expr(Expr),
           return_error(Line, "bad #. expression")).

LFE

(terminals symbol number string binary fun |(| |)| |[| |]| |.| |'| |`| |,| |,@|
           |#(| |#B(| |#M(| |#'| |#.| )

(non-terminals form sexpr list list-tail proper-list )

(root-symbol form)

(rule form (sexpr) $1)
(rule sexpr (symbol) (value $1))
(rule sexpr (number) (value $1))
(rule sexpr (string) (value $1))
(rule sexpr (binary) (value $1))
(rule sexpr (|#'|) (make-fun (value $1)))
(rule sexpr (|#.| sexpr) (eval-expr (line $1) $2))
(rule sexpr (|'| sexpr) `(quote ,$2))
(rule sexpr (|`| sexpr) `(backquote ,$2))
(rule sexpr (|,| sexpr) `(comma ,$2))
(rule sexpr (|,@| sexpr) `(comma-at ,$2))
(rule sexpr (|(| list |)|) $2)
(rule sexpr (|[| list |]|) $2)
(rule sexpr (|#(| proper-list |)|) (list_to_tuple $2))
(rule sexpr (|#B(| proper-list |)|)
      (make-bin (line $1) $2))
(rule sexpr (|#M(| proper-list |)|)
      (make-map (line $1) $2))
(rule list (sexpr list-tail) (cons $1 $2))
(rule list ($empty) ())
(rule list-tail (sexpr list-tail) (cons $1 $2))
(rule list-tail (|.| sexpr) $2)
(rule list-tail ($empty) ())
(rule proper-list (sexpr proper-list) (cons $1 $2))
(rule proper-list ($empty) ())

;; Extra LFE code.
lfe-code

(defmacro CATCH (expr error)
  `(try ,expr
     (catch (`#(,_ ,_ ,_) ,error))))

;; For backwards compatibility
(extend-module (export (sexpr 1) (sexpr 2)))

(defun sexpr (ts) (form ts))
(defun sexpr (cont ts) (form cont ts))

;; make_fun(String) -> FunList.
;;  Convert a fun string to a fun sexpr.
;;    "F/A" -> ['fun', F, A].
;;    "M:F/A" -> ['fun', M, F, A].

(defun make-fun
  (["=:=/2"] `(fun =:= 2))
  ([str]
   (let* ((j (string:rchr str #\/))
          (a (list_to_integer (string:substr str 1 (+ j 1)))))
     (case (string:chr str #\:)
       (0 (let ((f (list_to_atom (string:substr str 1 (- j 1)))))
            `(fun ,f ,a)))
       (i (let ((f (list_to_atom (string:substr str (+ i 1) (- j i 1))))
                (m (list_to_atom (string:substr str 1 (- i 1)))))
            `(fun ,m ,f ,a)))))))

;; make-bin(Line, Segments) -> Binary.
;;  Make a binary from the segments.

(defun make-bin (line segs)
  (CATCH (lfe_eval:expr (cons 'binary segs))
         (return_error line "bad binary")))

;; make-map(Line, Elements) -> Map.
;;  Make a map from the key/value elements.

(defun make-map (line es)
  (CATCH (maps:from_list (pair-list es))
         (return_error line "bad map")))

;; pair_list(List) -> [{A,B}].
;;  Generate a list of tuple pairs from the elements. An error if odd
;;  number of elements in list.

(defun pair-list
  ([`(,a ,b . ,l)] `(#(,a ,b) . ,(pair-list l)))
  ([()] ()))

;; eval-expr(Line, Expr) -> Val.
;;  Evaluate #. expression.

(defun eval-expr (line expr)
  (CATCH (lfe_eval:expr expr)
         (return_error line "bad #. expression")))