-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlexer.py
162 lines (129 loc) · 3.05 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import ply.lex as lex
import re
import string
import random
from ST import SymbolTable
class LA(object):
'''Tokenize input steam'''
# List of token names
tokens=('while','number','plus','minus','times','divide','equals','lparen',
'logic','logicnot','rparen','comment','keywords','append','string','builtinmethod',
'range','great','rsquare','lsquare','newline','lflower','rflower','less','begin','break',
'else','end','for','if','true','false','return','then_tok','elsif','in','do',
'quotes','dollar','commas','bar','name')
# Regex rules for simple Tokens
t_plus=r'\+'
t_minus=r'-'
t_times=r'\*'
t_divide=r'/'
t_lparen=r'\('
t_rparen=r'\)'
t_equals=r'='
t_append=r'<<'
t_range=r'\.\.'
t_ignore='[ \t]'
t_less=r'\<'
t_keywords=r'true|unless|until|when'
t_lsquare=r'\['
t_rsquare=r'\]'
t_lflower=r'\{'
t_rflower=r'\}'
t_quotes=r'\"'
t_dollar=r'\$'
t_commas=r'\,'
t_bar=r'\|'
t_great=r'\>'
t_name= r'[a-zA-Z_][a-zA-Z0-9_]*'
# Regex rules with action code
def t_elsif(self,t):
r'elsif'
return t
def t_while(self,t):
r'while'
return t
def t_begin(self,t):
r'begin'
return t
def t_break(self,t):
r'break'
return t
def t_else(self,t):
r'else'
return t
def t_end(self,t):
r'end'
return t
def t_for(self,t):
r'for'
return t
def t_if(self,t):
r'if'
return t
def t_true(self,t):
r'true'
return t
def t_false(self,t):
r'false'
return t
def t_return(self,t):
r'return'
return t
def t_then_tok(self,t):
r'then'
return t
def t_in(self,t):
r'in'
return t
def t_do(self,t):
r'do'
return t
def t_logic(self,t):
r'or|and'
return t
def t_logicnot(self,t):
r'not'
return t
def t_number(self,t):
r'\d+' #docstring representing itd Regex
t.value = int(t.value)
return t
def t_builtinmethod(self,t):
r'Array|Float|Integer|String|at_exit|autoload|binding|caller|catch|chop|chop!|chomp|chomp!|eval|exec|exit|exit!|fail|fork|format|gets|global_variables|gsub|gsub!|iterator?|lambda|load|local_variables|loop|open|print|printf|proc|putc|puts|raise|rand|readline|readlines|require|select|sleep|split|sprintf|srand|sub|sub!|syscall|system|test|trace_var|trap|untrace_var' #for built in functions
return t
def t_string(self,t):
r'\"[^"]*\"' #docstring representing functions Regex
return t
#Handling single line comments
def t_comment(self,t):
r'\#[^\n]*'
pass
#Tracking Line no.s
def t_newline(self,t):
r'\n+'
t.lexer.lineno += len(t.value)
return t
#Error handler
def t_error(self,t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
#Building The Lexer
def build(self,**kwargs):
self.lexer=lex.lex(module=self,**kwargs)
#Generating tokens
def tokenize(self,data):
tkns=[]
self.lexer.input(data)
while(True):
tok=self.lexer.token()
if not tok:
break
if(tok.type != 'newline'):
tkns.append([tok.type,tok.value])
return(tkns)
# Testing
code_file=open('test.rb','r').read()
l=LA()
l.build()
tks=l.tokenize(code_file)
st=SymbolTable()
symbol_table=st.gen_st(tks)