-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.mll
76 lines (67 loc) · 2.22 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
(* The first section of the lexer definition, called the *header*,
is the part that appears below between { and }. It is code
that will simply be copied literally into the generated lexer.ml. *)
{
open Parser
}
(* The second section of the lexer definition defines *identifiers*
that will be used later in the definition. Each identifier is
a *regular expression*. We won't go into details on how regular
expressions work.
Below, we define regular expressions for
- whitespace (spaces and tabs),
- digits (0 through 9)
- integers (nonempty sequences of digits, optionally preceded by a minus sign)
- letters (a through z, and A through Z), and
- identifiers (nonempty sequences of letters).
FYI, these aren't exactly the same as the OCaml definitions of integers and
identifiers. *)
let white = [' ' '\t' '\n']+
let digit = ['0'-'9']
let int = digit+
let letter = ['a'-'z' 'A'-'Z']
let id = letter ['a'-'z' 'A'-'Z' '0'-'9' '_']*
(* The final section of the lexer definition defines how to parse a character
stream into a token stream. Each of the rules below has the form
| regexp { action }
If the lexer sees the regular expression [regexp], it produces the token
specified by the [action]. We won't go into details on how the actions
work. *)
rule read =
parse
| white { read lexbuf }
| "+" { PLUS }
| "-" { MINUS }
| "*" { TIMES }
| "/" { DIVIDED }
| "(" { LPAREN }
| ")" { RPAREN }
| "{" { LBRACE }
| "}" { RBRACE }
| ";" { SEMICOLON }
| ":" { COLON }
| "," { COMMA }
| "let" { LET }
| "=" { EQUALS }
| "in" { IN }
| "proc" { PROC }
| "zero?" { ISZERO }
| "if" { IF }
| "then" { THEN }
| "else" { ELSE }
| "letrec" { LETREC }
| "set" { SET }
| "begin" { BEGIN }
| "end" { END }
| "newref" { NEWREF }
| "deref" { DEREF }
| "setref" { SETREF }
| "int" { INTTYPE }
| "bool" { BOOLTYPE }
| "unit" { UNITTYPE }
| "->" { ARROW }
| "ref" { REFTYPE }
| id { ID (Lexing.lexeme lexbuf) }
| int { INT (int_of_string (Lexing.lexeme lexbuf)) }
| eof { EOF }
(* And that's the end of the lexer definition. *)