1 /**
2 * Authors: k.inaba
3 * License: NYSL 0.9982 http://www.kmonos.net/nysl/
4 *
5 * Parser for Polemy programming language
6 */
7 module polemy.parse;
8 import polemy._common;
9 import polemy.failure;
10 import polemy.lex;
11 import polemy.ast;
12
13 /// Parse a string and return its AST
14 /// Throws: ParseException, LexException, UnexpectedEOF
15
16 AST parseString(S, T...)(S str, T fn_ln_cn)
17 {
18 return parserFromString(str, fn_ln_cn).parse();
19 }
20
21 /// Parse the content of a file and return its AST
22 /// Throws: ParseException, LexException, UnexpectedEOF
23
24 AST parseFile(S, T...)(S filename, T ln_cn)
25 {
26 return parserFromFile(filename, ln_cn).parse();
27 }
28
29 // Named Constructors of Parser
30
31 private auto parserFromLexer(Lexer)(Lexer lex)
32 { return new Parser!Lexer(lex); }
33
34 private auto parserFromString(T...)(T params)
35 { return parserFromLexer(lexerFromString(params)); }
36
37 private auto parserFromFile(T...)(T params)
38 { return parserFromLexer(lexerFromFile(params)); }
39
40 // Parser
41
42 private class Parser(Lexer)
43 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
44 {
45 AST parse()
46 {
47 auto e = Body();
48 if( !lex.empty )
49 throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
50 return e;
51 }
52
53 AST Body()
54 {
55 /// Body ::= Declaration
56 /// | TopLevelExpression
57
58 if( closingBracket() )
59 return doNothingExpression();
60
61 auto saved = lex.save;
62 if( auto e = Declaration() )
63 return e;
64 lex = saved;
65 return TopLevelExpression();
66 }
67
68 AST Declaration() // returns null if it is not a declaration
69 {
70 /// Declaration ::=
71 /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
72 /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
73 /// | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
74 /// | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
75
76 auto pos = currentPosition();
77 string layer = "";
78 bool layerRiseDecl = false;
79
80 if( tryEat("@") )
81 {
82 layer = "@" ~ eatId("after @", AllowQuoted);
83 if( layer == "@@" )
84 {
85 layer = "@" ~ eatId("after @@", AllowQuoted);
86 layerRiseDecl = true;
87 }
88 else
89 {
90 if( tryEat("(") )
91 return null; // @lay(...) expression, not a declaration
92 }
93 }
94
95 // [TODO] Refactor
96 if( layerRiseDecl )
97 {
98 string kwd = "@" ~ layer;
99 string var = layer;
100
101 auto e = tryEat("(")
102 ? parseLambdaAfterOpenParen(pos) // let var ( ...
103 : (eat("=", "after "~kwd), E(0)); // let var = ...
104 if( moreDeclarationExists() )
105 return new LetExpression(pos, var, "(system)", e, Body());
106 else
107 return new LetExpression(pos, var, "(system)", e, new VarExpression(pos, var));
108 }
109 else
110 {
111 string kwd = layer;
112 if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
113 return null; // none of {@lay, let, var, def} occurred, it's not a declaration
114
115 auto varpos = currentPosition();
116 string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
117
118 auto e = tryEat("(")
119 ? parseLambdaAfterOpenParen(varpos) // let var ( ...
120 : (eat("=", "after "~kwd), E(0)); // let var = ...
121 if( moreDeclarationExists() )
122 return new LetExpression(pos, var, layer, e, Body());
123 else
124 return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
125 }
126 }
127
128 AST TopLevelExpression()
129 {
130 /// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
131
132 auto pos = currentPosition();
133 auto e = E(0);
134 if( moreDeclarationExists() )
135 return new LetExpression(pos, "_", "", e, Body());
136 else
137 return e;
138 }
139
140 private bool moreDeclarationExists()
141 {
142 return (tryEat(";") || tryEat("in")) && !closingBracket();
143 }
144
145 private bool closingBracket()
146 {
147 return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
148 }
149
150 // [TODO] make this customizable from program
151 private static string[][] operator_perferences = [
152 ["||"],
153 ["&&"],
154 ["!="],
155 ["=="],
156 ["<","<=",">",">="],
157 ["|"],
158 ["^"],
159 ["&"],
160 ["<<", ">>"],
161 ["+","-"],
162 ["~"],
163 ["*","/","%"],
164 ["^^","**"],
165 [".",".?"]
166 ];
167
168 AST E(size_t level)
169 {
170 /// Expression ::= (Binary left-associative operators over) Funcall
171
172 AST rec(AST lhs)
173 {
174 if( closingBracket() )
175 return lhs;
176
177 auto pos = currentPosition();
178 foreach(op; operator_perferences[level])
179 if( tryEat(op) )
180 if( op[0]=='.' )
181 return rec(
182 new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, parseId()));
183 else
184 return rec(
185 new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
186 return lhs;
187 }
188
189 if( operator_perferences.length <= level )
190 return Funcall();
191 else
192 return rec(E(level+1));
193 }
194
195 AST Funcall()
196 {
197 /// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
198
199 auto e = BaseExpression();
200 for(;;)
201 if( tryEat("(") )
202 {
203 auto pos = currentPosition();
204 AST[] args;
205 while( !tryEat(")") ) {
206 if( lex.empty )
207 throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
208 args ~= E(0);
209 if( !tryEat(",") ) {
210 eat(")", "after function parameters");
211 break;
212 }
213 }
214 e = new FuncallExpression(e.pos, e, args);
215 }
216 else if( tryEat("{") )
217 {
218 e = parseTableSetAfterBrace(e);
219 }
220 else
221 break;
222 return e;
223 }
224
225 AST parseTableSetAfterBrace(AST e)
226 {
227 if( tryEat("}") )
228 return e;
229 auto pos = currentPosition();
230 for(;;)
231 {
232 string key = eatId("for table key", AllowQuoted);
233 eat(":", "after table key");
234 AST val = E(0);
235 e = new FuncallExpression(pos, new VarExpression(pos,".="),
236 e, new StrLiteral(pos,key), val);
237 if( !tryEat(",") )
238 {
239 eat("}", "for the end of table literal");
240 break;
241 }
242 }
243 return e;
244 }
245
246 AST BaseExpression()
247 {
248 if( lex.empty )
249 throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
250
251 auto pos = lex.front.pos;
252 if( lex.front.quoted )
253 {
254 scope(exit) lex.popFront;
255 return new StrLiteral(pos, lex.front.str);
256 }
257 if( isNumber(lex.front.str) )
258 {
259 scope(exit) lex.popFront;
260 return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
261 }
262 if( tryEat("@") )
263 {
264 auto lay = "@"~eatId("for layer ID");
265 eat("(", "for layered execution");
266 auto e = Body();
267 eat(")", "after "~lay~"(...");
268 return new LayeredExpression(pos, lay, e);
269 }
270 if( tryEat("(") )
271 {
272 auto e = Body();
273 eat(")", "after parenthesized expression");
274 return e;
275 }
276 if( tryEat("{") )
277 {
278 AST e = new FuncallExpression(pos, new VarExpression(pos,"{}"));
279 return parseTableSetAfterBrace(e);
280 }
281 if( tryEat("if") )
282 {
283 eat("(", "after if");
284 auto cond = E(0);
285 eat(")", "after if condition");
286 auto thenPos = lex.front.pos;
287 eat("{", "after if condition");
288 auto th = Body();
289 eat("}", "after if-then body");
290 auto el = doNothingExpression();
291 auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
292 if( tryEat("else") ) {
293 eat("{", "after else");
294 el = Body();
295 eat("}", "after else body");
296 }
297 return new FuncallExpression(pos,
298 new VarExpression(pos, "if"),
299 cond,
300 new FunLiteral(thenPos, [], th),
301 new FunLiteral(elsePos, [], el)
302 );
303 }
304 if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
305 {
306 eat("(", "after fun");
307 return parseLambdaAfterOpenParen(pos);
308 }
309 scope(exit) lex.popFront;
310 return new VarExpression(pos, lex.front.str);
311 }
312
313 AST parseId()
314 {
315 scope(exit) lex.popFront;
316 return new StrLiteral(currentPosition(), lex.front.str);
317 }
318
319 AST parseLambdaAfterOpenParen(immutable LexPosition pos)
320 {
321 Parameter[] params;
322 while( !tryEat(")") )
323 {
324 params ~= parseParam();
325 if( !tryEat(",") ) {
326 eat(")", "after function parameters");
327 break;
328 }
329 }
330 eat("{", "after function parameters");
331 auto funbody = Body();
332 eat("}", "after function body");
333 return new FunLiteral(pos, params, funbody);
334 }
335
336 Parameter parseParam()
337 {
338 string var;
339 string[] lay;
340 while( !closingBracket() && !lex.empty && lex.front.str!="," )
341 {
342 auto pos = currentPosition();
343 string p = eatId("for function parameter", AllowQuoted);
344 if( p == "@" )
345 lay ~= "@" ~ eatId("after @", AllowQuoted);
346 else if( var.empty )
347 var = p;
348 else
349 throw genex!ParseException(pos, "one parameter has two names");
350 }
351 return new Parameter(var, lay);
352 }
353
354 private:
355 Lexer lex;
356 this(Lexer lex) { this.lex = lex; }
357
358 bool isNumber(string s)
359 {
360 return find!(`a<'0' || '9'<a`)(s).empty;
361 }
362
363 void eat(string kwd, lazy string msg)
364 {
365 if( !tryEat(kwd) )
366 if( lex.empty )
367 throw genex!UnexpectedEOF(
368 currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
369 else
370 throw genex!ParseException(
371 currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
372 }
373
374 bool tryEat(string kwd)
375 {
376 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
377 return false;
378 lex.popFront;
379 return true;
380 }
381
382 enum {AllowQuoted=true, DisallowQuoted=false};
383 string eatId(lazy string msg, bool aq=DisallowQuoted)
384 {
385 if( lex.empty )
386 throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
387 if( !aq && lex.front.quoted )
388 throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
389 scope(exit) lex.popFront;
390 return lex.front.str;
391 }
392
393 AST doNothingExpression()
394 {
395 return new IntLiteral(currentPosition(), BigInt(178));
396 }
397
398 immutable(LexPosition) currentPosition()
399 {
400 return lex.empty ? null : lex.front.pos;
401 }
402 }
403
404 unittest
405 {
406 mixin EasyAST;
407
408 assert_eq(parseString(`123`), intl(123));
409 assert_eq(parseString(`"foo"`), strl("foo"));
410 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
411 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
412 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
413 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
414 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
415 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
416 assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
417 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
418 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
419 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
420 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
421 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
422 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
423 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
424 assert_eq(parseString(`if(1){}else{3}()()`),
425 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
426 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
427 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
428 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
429 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
430 assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
431 assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
432 funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
433
434 assert_eq(parseString(`
435 let x = 100; #comment
436 let y = 200; #comment!!!!!
437 x+y
438 `),
439 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
440 );
441
442 assert_eq(parseString(`
443 var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
444 fac(10)
445 `),
446 let("fac", "", fun(["x"],
447 call(var("if"),
448 call(var("<="), var("x"), intl(1)),
449 fun([], intl(1)),
450 fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
451 )),
452 call(var("fac"),intl(10))
453 )
454 );
455 }
456
457 unittest
458 {
459 assert_throw!UnexpectedEOF(parseString(`1+`));
460 assert_throw!ParseException(parseString(`1+2}`));
461 assert_throw!UnexpectedEOF(parseString(`let "x"`));
462 assert_throw!UnexpectedEOF(parseString(`var`));
463 assert_throw!ParseException(parseString(`@val x ==`));
464 assert_throw!ParseException(parseString(`if(){1}`));
465 assert_throw!UnexpectedEOF(parseString(`f(`));
466 }
467
468 unittest
469 {
470 mixin EasyAST;
471 assert_eq(parseString(`def foo(x) { x+1 }; foo`),
472 let("foo", "",
473 fun(["x"], call(var("+"), var("x"), intl(1))),
474 var("foo"))
475 );
476
477 assert_eq(parseString(`@@type ( x ) { x }`),
478 let("@type", "(system)", fun(["x"], var("x")), var("@type")) );
479
480 assert_eq(parseString(`{}`), call(var("{}")));
481 assert_eq(parseString(`{foo:1,"bar":2}`),
482 call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
483 assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
484 assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
485 assert_eq(parseString(`x{y:1}`), call(var(".="),var("x"),strl("y"),intl(1)));
486 }