-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.js
206 lines (189 loc) · 6.46 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
// parser.js
// Functions to create an abstract syntax tree from an array of Scheme tokens.
var predicates = require('./predicates.js');
var assert = require('assert');
var lexer = require('./lexer.js');
var tokenize = lexer.tokenize;
var print = console.log;
function separate_sexps(tokens) {
// parse a token array that contains several, non-nested s-expressions.
// return all top-level s-expressions, each in an Array, in a single Array.
var i;
var tmp_stack = [];
var stack_stack = [];
var stack_depth = 0;
for (i=0; i<tokens.length; i++) {
if (predicates.is_lparen(tokens[i])) { // entering new depth
stack_depth += 1;
tmp_stack.push(tokens[i]);
continue;
} else if (predicates.is_rparen(tokens[i])) {
tmp_stack.push(tokens[i]);
stack_depth -= 1;
if (stack_depth === 0) { // leaving top-level s-exp
stack_stack.push(tmp_stack);
tmp_stack = [];
continue;
}
continue;
}
tmp_stack.push(tokens[i]);
}
return stack_stack;
}
var parse_statement = function (tokens) {
// given a list of tokens representing a single top-level expression, create an AST
// TODO: - handle null terminated lists and dotted pairs
// - Quoting
// - backquotes and escaping
// - strings
var ast_stack = {};
var stack_depth = 0;
var i;
var inside_backquote_sexp = false; // true when in a backquoted s-expression
var backquoted_depth = 0;
var inside_escaped_sexp = false;
var escaped_depth = 0;
var backquoted_var = false;
var escaped_var = false;
for (i=0; i<tokens.length; i++) {
// print("inside backquote sexp?", inside_backquote_sexp);
// print("backquote depth", backquoted_depth);
// print("inside_escaped_sexp?", inside_escaped_sexp);
// print("escaped_depth", escaped_depth);
// print("backquoted_var?", backquoted_var);
// print("escaped_var?", escaped_var);
// print("stack_depth", stack_depth);
// print("token: ", tokens[i]);
// debugger;
if (predicates.is_backquote(tokens[i])) {
// backquoted expression capture
if (predicates.is_lparen(tokens[i+1])) {
inside_backquote_sexp = true;
} else {
backquoted_var = true;
}
stack_depth += 1;
backquoted_depth += 1;
ast_stack[stack_depth] = ['BACKQUOTE'];
} else if (predicates.is_lparen(tokens[i])) { // entering new depth
if (stack_depth < 0) {
throw new Error('unbalanced paren within: ' + tokens);
}
if (inside_backquote_sexp) {
backquoted_depth += 1;
if (inside_escaped_sexp) {
escaped_depth += 1;
}
}
stack_depth += 1;
ast_stack[stack_depth] = [];
} else if (predicates.is_rparen(tokens[i])) { // leaving depth
if (stack_depth > 1) {
if (inside_backquote_sexp) {
// final depth within backquoted expression
if (inside_escaped_sexp) {
// if escape quote count is 2,
if (escaped_depth === 2) {
ast_stack[stack_depth].push(null);
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
stack_depth -=1;
backquoted_depth -= 1;
inside_escaped_sexp = false;
escaped_depth = 0;
// add backquoted Array to the stack
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
stack_depth -=1;
backquoted_depth -= 1;
continue;
} else {
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
backquoted_depth -= 1;
stack_depth -= 1;
continue;
}
} else if (backquoted_depth === 2) {
// end of backquoted s-expression
ast_stack[stack_depth].push(null);
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
stack_depth -=1;
inside_backquote_sexp = false;
backquoted_depth = 0;
// add backquoted Array to the stack
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
stack_depth -=1;
} else {
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
backquoted_depth -= 1;
stack_depth -= 1;
}
} else if (predicates.is_dotted_pair(ast_stack[stack_depth])) {
// push dotted pair to stack
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
stack_depth -= 1;
} else {
ast_stack[stack_depth].push(null);
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
stack_depth -= 1;
}
} else { // end of a s-expression
ast_stack[stack_depth].push(null);
}
} else {
// check for comma escaped expressions inside a backquote
if (inside_backquote_sexp) {
if (predicates.is_comma(tokens[i])) {
stack_depth += 1;
backquoted_depth += 1;
ast_stack[stack_depth] = ['COMMA'];
if (predicates.is_lparen(tokens[i+1])) {
inside_escaped_sexp = true;
} else {
escaped_var = true;
}
escaped_depth += 1;
} else if (escaped_var) {
ast_stack[stack_depth].push(tokens[i]);
ast_stack[stack_depth - 1].push(ast_stack[stack_depth]);
delete ast_stack[stack_depth];
stack_depth -= 1;
backquoted_depth -= 1;
escaped_var = false;
} else { // inside backquote with a symbol
if (inside_escaped_sexp) {
// check depth
}
ast_stack[stack_depth].push(tokens[i]);
}
} else {
ast_stack[stack_depth].push(tokens[i]);
}
}
}
return ast_stack[stack_depth];
};
function parse(tokens) {
// return an array of ast objects from any scheme tokens
var ast_list = [];
var i; // counter
var separate_statements = separate_sexps(tokens); // separate statements in Arrays
for (i=0; i<separate_statements.length; i++) {
ast_list.push(parse_statement(separate_statements[i]));
}
return ast_list;
};
exports.separate_sexps = separate_sexps;
exports.parse = parse;
////////////////////////////////////////////////////////////////////////////////
// var input = "(defmacro reverse_args (a b c) `(+ ,(c b a) ,b 99 ,a ))";
// var tokens = tokenize(input);
// print(input);
// print(JSON.stringify(parse(tokens)[0]));