1 /**
2 * Authors: k.inaba
3 * License: NYSL 0.9982 http://www.kmonos.net/nysl/
4 *
5 * Parser for Polemy programming language
6 */
7 module polemy.parse;
8 import polemy._common;
9 import polemy.failure;
10 import polemy.lex;
11 import polemy.ast;
12 import polemy.layer;
13 import polemy.fresh;
14
15 /// Parse a string and return its AST
16
17 AST parseString(S, T...)(S str, T fn_ln_cn)
18 {
19 return parserFromString(str, fn_ln_cn).parse();
20 }
21
22 /// Parse the content of a file and return its AST
23
24 AST parseFile(S, T...)(S filename, T ln_cn)
25 {
26 return parserFromFile(filename, ln_cn).parse();
27 }
28
29 // Named Constructors of Parser
30
31 private auto parserFromLexer(Lexer)(Lexer lex)
32 { return new Parser!Lexer(lex); }
33
34 private auto parserFromString(T...)(T params)
35 { return parserFromLexer(lexerFromString(params)); }
36
37 private auto parserFromFile(T...)(T params)
38 { return parserFromLexer(lexerFromFile(params)); }
39
40 // Parser
41
42 private class Parser(Lexer)
43 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
44 {
45 AST parse()
46 {
47 auto e = Body();
48 if( !lex.empty )
49 throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
50 return e;
51 }
52
53 AST Body()
54 {
55 /// Body ::= Declaration
56 /// | TopLevelExpression
57
58 if( closingBracket() )
59 return doNothingExpression();
60
61 auto saved = lex.save;
62 if( auto e = Declaration() )
63 return e;
64 lex = saved;
65 return TopLevelExpression();
66 }
67
68 AST Declaration() // returns null if it is not a declaration
69 {
70 /// Declaration ::=
71 /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
72 /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
73 /// | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
74 /// | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
75
76 auto pos = currentPosition();
77 Layer layer = "";
78 bool layerLiftDecl = false;
79
80 if( tryEat("@") )
81 {
82 layer = "@" ~ eatId("after @", AllowQuoted);
83 if( layer == "@@" )
84 {
85 layer = "@" ~ eatId("after @@", AllowQuoted);
86 layerLiftDecl = true;
87 }
88 else
89 {
90 if( tryEat("(") )
91 return null; // @lay(...) expression, not a declaration
92 }
93 }
94
95 // [TODO] Refactor
96 if( layerLiftDecl )
97 {
98 string kwd = "@" ~ layer;
99 string var = layer;
100
101 auto e = tryEat("(")
102 ? parseLambdaAfterOpenParen(pos) // let var ( ...
103 : (eat("=", "after "~kwd), E(0)); // let var = ...
104 if( moreDeclarationExists() )
105 return new Let(pos, var, LiftLayer, e, Body());
106 else
107 return new Let(pos, var, LiftLayer, e,
108 new Lay(pos, LiftLayer, new Var(pos, var))
109 );
110 }
111 else
112 {
113 string kwd = layer;
114 if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
115 return null; // none of {@lay, let, var, def} occurred, it's not a declaration
116
117 auto varpos = currentPosition();
118 string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
119
120 auto e = tryEat("(")
121 ? parseLambdaAfterOpenParen(pos) // let var ( ...
122 : (eat("=", "after "~kwd), E(0)); // let var = ...
123 if( moreDeclarationExists() )
124 return new Let(pos, var, layer, e, Body());
125 else {
126 if( layer.empty )
127 return new Let(pos, var, layer, e, new Var(varpos, var));
128 else if( isMacroLayer(layer) )
129 return new Let(pos, var, layer, e, new Str(varpos, "(macro definition)"));
130 else
131 return new Let(pos, var, layer, e, new Lay(varpos, layer, new Var(varpos, var)));
132 }
133 }
134 }
135
136 AST TopLevelExpression()
137 {
138 /// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
139
140 auto pos = currentPosition();
141 auto e = E(0);
142 if( moreDeclarationExists() )
143 return new Let(pos, "_", "", e, Body());
144 else
145 return e;
146 }
147
148 private bool moreDeclarationExists()
149 {
150 return (tryEat(";") || tryEat("in")) && !closingBracket();
151 }
152
153 private bool closingBracket()
154 {
155 return lex.empty || !lex.front.quoted && ["}",")","]",","].canFind(lex.front.str);
156 }
157
158 // [TODO] make this customizable from program
159 private static string[][] operator_perferences = [
160 ["||"],
161 ["&&"],
162 ["!="],
163 ["=="],
164 ["<","<=",">",">="],
165 ["|"],
166 ["^"],
167 ["&"],
168 ["<<", ">>", "<<<", ">>>"],
169 ["+","-"],
170 ["~"],
171 ["*","/","%"],
172 ["^^","**"],
173 [".",".?"]
174 ];
175
176 AST E(size_t level)
177 {
178 /// Expression ::= (Binary left-associative operators over) Funcall
179
180 AST rec(AST lhs)
181 {
182 if( closingBracket() )
183 return lhs;
184
185 auto pos = currentPosition();
186 foreach(op; operator_perferences[level])
187 if( tryEat(op) )
188 if( op[0]=='.' )
189 return rec(
190 new App(lhs.pos, new Var(pos, op), lhs, parseId()));
191 else
192 return rec(
193 new App(lhs.pos, new Var(pos, op), lhs, E(level+1)));
194 return lhs;
195 }
196
197 if( operator_perferences.length <= level )
198 return Funcall();
199 else
200 return rec(E(level+1));
201 }
202
203 AST Funcall()
204 {
205 /// Funcall ::= BaseExpression ["(" Expression%"," ")" | "{" ENTRIES "}"]*
206
207 auto e = BaseExpression();
208 for(;;)
209 if( tryEat("(") )
210 {
211 auto pos = currentPosition();
212 AST[] args;
213 while( !tryEat(")") ) {
214 if( lex.empty )
215 throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
216 args ~= E(0);
217 if( !tryEat(",") ) {
218 eat(")", "after function parameters");
219 break;
220 }
221 }
222 e = new App(e.pos, e, args);
223 }
224 else if( tryEat("{") )
225 {
226 e = parseTableSetAfterBrace(e);
227 }
228 else
229 break;
230 return e;
231 }
232
233 AST parseTableSetAfterBrace(AST e)
234 {
235 /// TableSet ::= "{" (ID ":" E) % "," "}"
236
237 if( tryEat("}") )
238 return e;
239 auto pos = currentPosition();
240 for(;;)
241 {
242 string key = eatId("for table key", AllowQuoted);
243 eat(":", "after table key");
244 AST val = E(0);
245 e = new App(pos, new Var(pos,".="),
246 e, new Str(pos,key), val);
247 if( !tryEat(",") )
248 {
249 eat("}", "for the end of table literal");
250 break;
251 }
252 }
253 return e;
254 }
255
256 AST BaseExpression()
257 {
258 if( lex.empty )
259 throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
260
261 auto pos = currentPosition();
262 if( lex.front.quoted )
263 {
264 scope(exit) lex.popFront;
265 return new Str(pos, lex.front.str);
266 }
267 if( isNumber(lex.front.str) )
268 {
269 scope(exit) lex.popFront;
270 return new Int(pos, BigInt(cast(string)lex.front.str));
271 }
272 if( tryEat("...") )
273 {
274 return new Die(pos);
275 }
276 if( tryEat("@") )
277 {
278 auto lay = "@"~eatId("for layer ID");
279 eat("(", "for layered execution");
280 auto e = Body();
281 eat(")", "after "~lay~"(...");
282 return new Lay(pos, lay, e);
283 }
284 if( tryEat("(") )
285 {
286 auto e = Body();
287 eat(")", "after parenthesized expression");
288 return e;
289 }
290 if( tryEat("{") )
291 {
292 AST e = new App(pos, new Var(pos,"{}"));
293 return parseTableSetAfterBrace(e);
294 }
295 if( tryEat("if") )
296 {
297 return parseIfAfterIf(pos);
298 }
299 if( tryEat("case") )
300 {
301 return parsePatternMatch(pos);
302 }
303 if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
304 {
305 eat("(", "after fun");
306 return parseLambdaAfterOpenParen(pos);
307 }
308 scope(exit) lex.popFront;
309 return new Var(pos, lex.front.str);
310 }
311
312 AST parseIfAfterIf(LexPosition pos)
313 {
314 auto cond = E(0);
315 auto thenPos = currentPosition();
316 if(!tryEat(":")) {
317 eat("then", "after if condition");
318 tryEat(":");
319 }
320 AST th = Body();
321 auto el = doNothingExpression();
322 auto elsePos = currentPosition();
323 if( tryEat("else") ) {
324 tryEat(":");
325 el = Body();
326 }
327 return new App(pos, new Var(pos,"if"), cond, new Fun(thenPos,[],th), new Fun(elsePos,[],el));
328 }
329
330 AST parsePatternMatch(LexPosition pos)
331 {
332 // case pmExpr CASES
333 //==>
334 // let pmVar = pmExpr in (... let pmTryFirst = ... in pmTryFirst())
335 AST pmExpr = E(0);
336 string pmVar = freshVarName();
337 string pmTryFirst = freshVarName();
338 AST pmBody = parsePatternMatchCases(pos, pmVar, pmTryFirst,
339 new App(pos, new Var(pos, pmTryFirst)));
340 return new Let(pos, pmVar, [], pmExpr, pmBody);
341 }
342
343 AST parsePatternMatchCases(LexPosition casePos, string pmVar, string tryThisBranchVar, AST thenDoThis)
344 {
345 // when pat: cBody
346 //==>
347 // ... let failBranchVar = ... in
348 // let tryThisBranchVar = fun(){ if(test){cBody}else{failBranchVar()} } in thenDoThis
349 if( tryEat("when") )
350 {
351 auto pos = currentPosition();
352 string failBranchVar = freshVarName();
353
354 auto pr = parsePattern();
355 eat(":", "after when pattern");
356 AST cBody = Body();
357 AST judgement = new App(pos, new Var(pos, "if"),
358 ppTest(pmVar, pr), new Fun(pos,[],ppBind(pmVar, pr, cBody)),
359 new Var(pos, failBranchVar));
360 return parsePatternMatchCases(casePos, pmVar, failBranchVar,
361 new Let(pos, tryThisBranchVar, [],
362 new Fun(pos,[],judgement), thenDoThis)
363 );
364 }
365 else
366 {
367 AST doNothing = new Fun(casePos,[], new Die(casePos));
368 return new Let(casePos, tryThisBranchVar, [], doNothing, thenDoThis);
369 }
370 }
371
372 // hageshiku tenuki
373 abstract class SinglePattern
374 {
375 string[] path;
376 mixin SimpleClass;
377 private AST access(string pmVar, string[] path) {
378 auto pos = currentPosition();
379 AST e = new Var(pos, pmVar);
380 foreach(p; path)
381 e = new App(pos, new Var(pos, "."), e, new Str(pos, p));
382 return e;
383 }
384 private AST has(AST e, string k) {
385 auto pos = currentPosition();
386 return opAndAnd(
387 new App(pos, new Var(pos, "_istbl"), e),
388 new App(pos, new Var(pos, ".?"), e, new Str(pos, k))
389 );
390 }
391 private AST opAndAnd(AST a, AST b) {
392 if( a is null ) return b;
393 if( b is null ) return a;
394 auto pos = currentPosition();
395 return new App(pos,
396 new Var(pos, "if"),
397 a,
398 new Fun(pos, [], b),
399 new Fun(pos, [], new Int(pos, 0))
400 );
401 }
402 AST ppTest(string pmVar) {
403 AST c = null;
404 for(int i=0; i<path.length; ++i)
405 c = opAndAnd(c, has(access(pmVar,path[0..i]), path[i]));
406 return c;
407 }
408 AST ppBind(string pmVar, AST thenDoThis) { return thenDoThis; }
409 }
410 class WildPattern : SinglePattern
411 {
412 mixin SimpleClass;
413 }
414 class VarPattern : SinglePattern
415 {
416 string name;
417 mixin SimpleClass;
418 AST ppBind(string pmVar, AST thenDoThis) {
419 auto pos = currentPosition();
420 return new Let(pos, name, [], access(pmVar,path), thenDoThis);
421 }
422 }
423 class ConstantPattern : SinglePattern
424 {
425 AST e;
426 mixin SimpleClass;
427 AST ppTest(string pmVar) {
428 auto pos = currentPosition();
429 return opAndAnd( super.ppTest(pmVar),
430 new App(pos, new Var(pos,"=="), access(pmVar,path), e)
431 );
432 }
433 }
434
435 SinglePattern[] parsePattern(string[] path = null)
436 {
437 SinglePattern[] result;
438 if( tryEat("{") )
439 {
440 if( !tryEat("}") ) {
441 do {
442 string key = eatId("in table pattern", AllowQuoted);
443 eat(":", "after field-id in table pattern");
444 result ~= parsePattern(path ~ key);
445 } while( tryEat(",") );
446 eat("}", "at the end of table pattern");
447 }
448 }
449 else
450 {
451 AST e = E(0);
452 if(auto ev = cast(Var)e)
453 if(ev.name == "_")
454 result ~= new WildPattern(path);
455 else
456 result ~= new VarPattern(path, ev.name);
457 else
458 result ~= new ConstantPattern(path, e);
459 }
460 return result;
461 }
462
463 AST ppTest(string pmVar, SinglePattern[] pats)
464 {
465 auto pos = currentPosition();
466 AST cond = null;
467 foreach(p; pats) {
468 AST c2 = p.ppTest(pmVar);
469 if( c2 !is null )
470 cond = cond is null ? c2
471 : new App(pos, new Var(pos,"&&"), cond, c2);
472 }
473 return cond is null ? new Int(currentPosition(), 1) : cond;
474 }
475
476 AST ppBind(string pmVar, SinglePattern[] pats, AST thenDoThis)
477 {
478 foreach(p; pats)
479 thenDoThis = p.ppBind(pmVar, thenDoThis);
480 return thenDoThis;
481 }
482
483 AST parseId()
484 {
485 scope(exit) lex.popFront;
486 return new Str(currentPosition(), lex.front.str);
487 }
488
489 AST parseLambdaAfterOpenParen(LexPosition pos)
490 {
491 Parameter[] params;
492 while( !tryEat(")") )
493 {
494 params ~= parseParam();
495 if( !tryEat(",") ) {
496 eat(")", "after function parameters");
497 break;
498 }
499 }
500 eat("{", "after function parameters");
501 auto funbody = Body();
502 eat("}", "after function body");
503 return new Fun(pos, params, funbody);
504 }
505
506 Parameter parseParam()
507 {
508 string var;
509 string[] lay;
510 while( !closingBracket() && !lex.empty && lex.front.str!="," )
511 {
512 auto pos = currentPosition();
513 string p = eatId("for function parameter", AllowQuoted);
514 if( p == "@" )
515 lay ~= "@" ~ eatId("after @", AllowQuoted);
516 else if( var.empty )
517 var = p;
518 else
519 throw genex!ParseException(pos, "one parameter has two names");
520 }
521 return new Parameter(var, lay);
522 }
523
524 private:
525 Lexer lex;
526 this(Lexer lex) { this.lex = lex; }
527
528 bool isNumber(string s)
529 {
530 return find!(`a<'0' || '9'<a`)(s).empty;
531 }
532
533 void eat(string kwd, lazy string msg)
534 {
535 if( !tryEat(kwd) )
536 if( lex.empty )
537 throw genex!UnexpectedEOF(
538 currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
539 else
540 throw genex!ParseException(
541 currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
542 }
543
544 bool tryEat(string kwd)
545 {
546 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
547 return false;
548 lex.popFront;
549 return true;
550 }
551
552 enum {AllowQuoted=true, DisallowQuoted=false};
553 string eatId(lazy string msg, bool aq=DisallowQuoted)
554 {
555 if( lex.empty )
556 throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
557 if( !aq && lex.front.quoted )
558 throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
559 scope(exit) lex.popFront;
560 return lex.front.str;
561 }
562
563 AST doNothingExpression()
564 {
565 return new Str(currentPosition(), "(empty function body)");
566 }
567
568 LexPosition currentPosition()
569 {
570 return lex.empty ? new LexPosition("EOF",0,0) : lex.front.pos;
571 }
572 }
573
574 unittest
575 {
576 mixin EasyAST;
577
578 assert_eq(parseString(`123`), intl(123));
579 assert_eq(parseString(`"foo"`), strl("foo"));
580 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
581 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
582 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
583 assert_eq(parseString("\u03BB(x,y){1}"), fun(["x","y"],intl(1)));
584 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
585 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
586 assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
587 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
588 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
589 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),lay("@val",var("x"))));
590 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),lay("@typ",var("x"))));
591 assert_eq(parseString(`@macro x=1`), let("x","@macro",intl(1),strl("(macro definition)")));
592 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
593 assert_eq(parseString(`if 1 then 2`), call(var("if"),intl(1),fun([],intl(2)),fun([],strl("(empty function body)"))));
594 assert_eq(parseString(`if 1 then: 2 else(3)`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
595 assert_eq(parseString(`(if 1 then () else 3)()()`),
596 call(call(call(var("if"),intl(1),fun([],strl("(empty function body)")),fun([],intl(3))))));
597 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
598 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
599 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
600 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
601 assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
602 assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
603 funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], strl("(empty function body)")));
604
605 assert_eq(parseString(`
606 let x = 100; #comment
607 let y = 200; #comment!!!!!
608 x+y
609 `),
610 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
611 );
612
613 assert_eq(parseString(`
614 var fac = fun(x){ if(x <= 1) then 1 else x*fac(x-1) };
615 fac(10)
616 `),
617 let("fac", "", fun(["x"],
618 call(var("if"),
619 call(var("<="), var("x"), intl(1)),
620 fun([], intl(1)),
621 fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
622 )),
623 call(var("fac"),intl(10))
624 )
625 );
626 }
627
628 unittest
629 {
630 assert_throw!UnexpectedEOF(parseString(`1+`));
631 assert_throw!ParseException(parseString(`1+2}`));
632 assert_throw!UnexpectedEOF(parseString(`let "x"`));
633 assert_throw!UnexpectedEOF(parseString(`var`));
634 assert_throw!ParseException(parseString(`@val x ==`));
635 assert_throw!ParseException(parseString(`if(){1}`));
636 assert_throw!UnexpectedEOF(parseString(`f(`));
637 assert_throw!ParseException(parseString(`fun(x y){}`));
638 }
639
640 unittest
641 {
642 mixin EasyAST;
643 assert_eq(parseString(`def foo(x) { x+1 }; foo`),
644 let("foo", "",
645 fun(["x"], call(var("+"), var("x"), intl(1))),
646 var("foo"))
647 );
648
649 assert_eq(parseString(`@@type ( x ) { x }`),
650 let("@type", LiftLayer, fun(["x"], var("x")), lay(LiftLayer, var("@type"))) );
651
652 assert_eq(parseString(`{}`), call(var("{}")));
653 assert_eq(parseString(`{foo:1,"bar":2}`),
654 call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
655 assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
656 assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
657 assert_eq(parseString(`x{y:1}`), call(var(".="),var("x"),strl("y"),intl(1)));
658 }
659
660 unittest
661 {
662 assert_nothrow(parseString(`
663 case( 1 )
664 when(x): 1
665 `));
666 assert_nothrow(parseString(`
667 case 1
668 when {aaaa:_}: 1
669 `));
670 assert_nothrow(parseString(`
671 case 1
672 when {aaaa:@value(x)}: 1
673 when {aaaa:{bbb:_}, ccc:123}: 1
674 `));
675 }