1 /**
2 * Authors: k.inaba
3 * License: NYSL 0.9982 http://www.kmonos.net/nysl/
4 *
5 * Parser for Polemy programming language
6 */
7 module polemy.parse;
8 import polemy._common;
9 import polemy.failure;
10 import polemy.lex;
11 import polemy.ast;
12 import polemy.layer;
13 import polemy.fresh;
14
15 /// Parse a string and return its AST
16
17 AST parseString(S, T...)(S str, T fn_ln_cn)
18 {
19 return parserFromString(str, fn_ln_cn).parse();
20 }
21
22 /// Parse the content of a file and return its AST
23
24 AST parseFile(S, T...)(S filename, T ln_cn)
25 {
26 return parserFromFile(filename, ln_cn).parse();
27 }
28
29 // Named Constructors of Parser
30
31 private auto parserFromLexer(Lexer)(Lexer lex)
32 { return new Parser!Lexer(lex); }
33
34 private auto parserFromString(T...)(T params)
35 { return parserFromLexer(lexerFromString(params)); }
36
37 private auto parserFromFile(T...)(T params)
38 { return parserFromLexer(lexerFromFile(params)); }
39
40 // Parser
41
42 private class Parser(Lexer)
43 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
44 {
45 AST parse()
46 {
47 auto e = Body();
48 if( !lex.empty )
49 throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
50 return e;
51 }
52
53 AST Body()
54 {
55 /// Body ::= Declaration
56 /// | TopLevelExpression
57
58 if( closingBracket() )
59 return doNothingExpression();
60
61 auto saved = lex.save;
62 if( auto e = Declaration() )
63 return e;
64 lex = saved;
65 return TopLevelExpression();
66 }
67
68 AST Declaration() // returns null if it is not a declaration
69 {
70 /// Declaration ::=
71 /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
72 /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
73 /// | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
74 /// | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
75
76 auto pos = currentPosition();
77 Layer layer = "";
78 bool layerLiftDecl = false;
79
80 if( tryEat("@") )
81 {
82 layer = "@" ~ eatId("after @", AllowQuoted);
83 if( layer == "@@" )
84 {
85 layer = "@" ~ eatId("after @@", AllowQuoted);
86 layerLiftDecl = true;
87 }
88 else
89 {
90 if( tryEat("(") )
91 return null; // @lay(...) expression, not a declaration
92 }
93 }
94
95 // [TODO] Refactor
96 if( layerLiftDecl )
97 {
98 string kwd = "@" ~ layer;
99 string var = layer;
100
101 auto e = tryEat("(")
102 ? parseLambdaAfterOpenParen(pos) // let var ( ...
103 : (eat("=", "after "~kwd), E(0)); // let var = ...
104 if( moreDeclarationExists() )
105 return new Let(pos, var, SystemLayer, e, Body());
106 else
107 return new Let(pos, var, SystemLayer, e,
108 new Lay(pos, SystemLayer, new Var(pos, var))
109 );
110 }
111 else
112 {
113 string kwd = layer;
114 if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
115 return null; // none of {@lay, let, var, def} occurred, it's not a declaration
116
117 auto varpos = currentPosition();
118 string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
119
120 auto e = tryEat("(")
121 ? parseLambdaAfterOpenParen(pos) // let var ( ...
122 : (eat("=", "after "~kwd), E(0)); // let var = ...
123 if( moreDeclarationExists() )
124 return new Let(pos, var, layer, e, Body());
125 else
126 return new Let(pos, var, layer, e, new Var(varpos, var));
127 }
128 }
129
130 AST TopLevelExpression()
131 {
132 /// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
133
134 auto pos = currentPosition();
135 auto e = E(0);
136 if( moreDeclarationExists() )
137 return new Let(pos, "_", "", e, Body());
138 else
139 return e;
140 }
141
142 private bool moreDeclarationExists()
143 {
144 return (tryEat(";") || tryEat("in")) && !closingBracket();
145 }
146
147 private bool closingBracket()
148 {
149 return lex.empty || !lex.front.quoted && ["}",")","]",","].canFind(lex.front.str);
150 }
151
152 // [TODO] make this customizable from program
153 private static string[][] operator_perferences = [
154 ["||"],
155 ["&&"],
156 ["!="],
157 ["=="],
158 ["<","<=",">",">="],
159 ["|"],
160 ["^"],
161 ["&"],
162 ["<<", ">>", "<<<", ">>>"],
163 ["+","-"],
164 ["~"],
165 ["*","/","%"],
166 ["^^","**"],
167 [".",".?"]
168 ];
169
170 AST E(size_t level)
171 {
172 /// Expression ::= (Binary left-associative operators over) Funcall
173
174 AST rec(AST lhs)
175 {
176 if( closingBracket() )
177 return lhs;
178
179 auto pos = currentPosition();
180 foreach(op; operator_perferences[level])
181 if( tryEat(op) )
182 if( op[0]=='.' )
183 return rec(
184 new App(lhs.pos, new Var(pos, op), lhs, parseId()));
185 else
186 return rec(
187 new App(lhs.pos, new Var(pos, op), lhs, E(level+1)));
188 return lhs;
189 }
190
191 if( operator_perferences.length <= level )
192 return Funcall();
193 else
194 return rec(E(level+1));
195 }
196
197 AST Funcall()
198 {
199 /// Funcall ::= BaseExpression ["(" Expression%"," ")" | "{" ENTRIES "}"]*
200
201 auto e = BaseExpression();
202 for(;;)
203 if( tryEat("(") )
204 {
205 auto pos = currentPosition();
206 AST[] args;
207 while( !tryEat(")") ) {
208 if( lex.empty )
209 throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
210 args ~= E(0);
211 if( !tryEat(",") ) {
212 eat(")", "after function parameters");
213 break;
214 }
215 }
216 e = new App(e.pos, e, args);
217 }
218 else if( tryEat("{") )
219 {
220 e = parseTableSetAfterBrace(e);
221 }
222 else
223 break;
224 return e;
225 }
226
227 AST parseTableSetAfterBrace(AST e)
228 {
229 /// TableSet ::= "{" (ID ":" E) % "," "}"
230
231 if( tryEat("}") )
232 return e;
233 auto pos = currentPosition();
234 for(;;)
235 {
236 string key = eatId("for table key", AllowQuoted);
237 eat(":", "after table key");
238 AST val = E(0);
239 e = new App(pos, new Var(pos,".="),
240 e, new Str(pos,key), val);
241 if( !tryEat(",") )
242 {
243 eat("}", "for the end of table literal");
244 break;
245 }
246 }
247 return e;
248 }
249
250 AST BaseExpression()
251 {
252 if( lex.empty )
253 throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
254
255 auto pos = lex.front.pos;
256 if( lex.front.quoted )
257 {
258 scope(exit) lex.popFront;
259 return new Str(pos, lex.front.str);
260 }
261 if( isNumber(lex.front.str) )
262 {
263 scope(exit) lex.popFront;
264 return new Int(pos, BigInt(cast(string)lex.front.str));
265 }
266 if( tryEat("@") )
267 {
268 auto lay = "@"~eatId("for layer ID");
269 eat("(", "for layered execution");
270 auto e = Body();
271 eat(")", "after "~lay~"(...");
272 return new Lay(pos, lay, e);
273 }
274 if( tryEat("(") )
275 {
276 auto e = Body();
277 eat(")", "after parenthesized expression");
278 return e;
279 }
280 if( tryEat("{") )
281 {
282 AST e = new App(pos, new Var(pos,"{}"));
283 return parseTableSetAfterBrace(e);
284 }
285 if( tryEat("if") )
286 {
287 return parseIfAfterIf(pos);
288 }
289 if( tryEat("case") )
290 {
291 return parsePatternMatch(pos);
292 }
293 if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
294 {
295 eat("(", "after fun");
296 return parseLambdaAfterOpenParen(pos);
297 }
298 scope(exit) lex.popFront;
299 return new Var(pos, lex.front.str);
300 }
301
302 AST parseIfAfterIf(LexPosition pos)
303 {
304 eat("(", "after if");
305 auto cond = E(0);
306 eat(")", "after if condition");
307 auto thenPos = lex.front.pos;
308 AST th;
309 if( tryEat("{") ) {
310 th = Body();
311 eat("}", "after if-then body");
312 } else {
313 th = E(0);
314 }
315 auto el = doNothingExpression();
316 auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
317 if( tryEat("else") )
318 if( tryEat("{") ) {
319 el = Body();
320 eat("}", "after else body");
321 } else {
322 el = E(0);
323 }
324 return new App(pos, new Var(pos,"if"), cond, new Fun(thenPos,[],th), new Fun(elsePos,[],el));
325 }
326
327 AST parsePatternMatch(LexPosition pos)
328 {
329 // case "(" pmExpr ")" CASES
330 //==>
331 // let pmVar = pmExpr in (... let pmTryFirst = ... in pmTryFirst())
332 eat("(", "after case");
333 AST pmExpr = E(0);
334 eat(")", "after case");
335 string pmVar = freshVarName();
336 string pmTryFirst = freshVarName();
337 AST pmBody = parsePatternMatchCases(pmVar, pmTryFirst,
338 new App(pos, new Var(pos, pmTryFirst)));
339 return new Let(pos, pmVar, [], pmExpr, pmBody);
340 }
341
342 AST parsePatternMatchCases(string pmVar, string tryThisBranchVar, AST thenDoThis)
343 {
344 // when( pat ) { cBody }
345 //==>
346 // ... let failBranchVar = ... in
347 // let tryThisBranchVar = fun(){ if(test){cBody}else{failBranchVar()} } in thenDoThis
348 if( tryEat("when") )
349 {
350 auto pos = currentPosition();
351 string failBranchVar = freshVarName();
352
353 eat("(", "after when");
354 auto pr = parsePattern();
355 eat(")", "after when");
356 eat("{", "after pattern");
357 AST cBody = Body();
358 AST judgement = new App(pos, new Var(pos, "if"),
359 ppTest(pmVar, pr), new Fun(pos,[],ppBind(pmVar, pr, cBody)),
360 new Var(pos, failBranchVar));
361 eat("}", "after pattern clause");
362 return parsePatternMatchCases(pmVar, failBranchVar,
363 new Let(pos, tryThisBranchVar, [],
364 new Fun(pos,[],judgement), thenDoThis)
365 );
366 }
367 else
368 {
369 auto pos = currentPosition();
370 AST doNothing = new Fun(pos,[],
371 new Str(pos, sprintf!"(pattern match failure:%s)"(pos)));
372 return new Let(currentPosition(), tryThisBranchVar, [], doNothing, thenDoThis);
373 }
374 }
375
376 // hageshiku tenuki
377 abstract class SinglePattern
378 {
379 string[] path;
380 mixin SimpleClass;
381 private AST access(string pmVar, string[] path) {
382 auto pos = currentPosition();
383 AST e = new Var(pos, pmVar);
384 foreach(p; path)
385 e = new App(pos, new Var(pos, "."), e, new Str(pos, p));
386 return e;
387 }
388 private AST has(AST e, string k) {
389 auto pos = currentPosition();
390 return opAndAnd(
391 new App(pos, new Var(pos, "_istable"), e),
392 new App(pos, new Var(pos, ".?"), e, new Str(pos, k))
393 );
394 }
395 private AST opAndAnd(AST a, AST b) {
396 if( a is null ) return b;
397 if( b is null ) return a;
398 auto pos = currentPosition();
399 return new App(pos,
400 new Var(pos, "if"),
401 a,
402 new Fun(pos, [], b),
403 new Fun(pos, [], new Int(pos, 0))
404 );
405 }
406 AST ppTest(string pmVar) {
407 AST c = null;
408 for(int i=0; i<path.length; ++i)
409 c = opAndAnd(c, has(access(pmVar,path[0..i]), path[i]));
410 return c;
411 }
412 AST ppBind(string pmVar, AST thenDoThis) { return thenDoThis; }
413 }
414 class WildPattern : SinglePattern
415 {
416 mixin SimpleClass;
417 }
418 class VarPattern : SinglePattern
419 {
420 string name;
421 mixin SimpleClass;
422 AST ppBind(string pmVar, AST thenDoThis) {
423 auto pos = currentPosition();
424 return new Let(pos, name, [], access(pmVar,path), thenDoThis);
425 }
426 }
427 class ConstantPattern : SinglePattern
428 {
429 AST e;
430 mixin SimpleClass;
431 AST ppTest(string pmVar) {
432 auto pos = currentPosition();
433 return opAndAnd( super.ppTest(pmVar),
434 new App(pos, new Var(pos,"=="), access(pmVar,path), e)
435 );
436 }
437 }
438
439 SinglePattern[] parsePattern(string[] path = null)
440 {
441 SinglePattern[] result;
442 if( tryEat("{") )
443 {
444 if( !tryEat("}") ) {
445 do {
446 string key = eatId("in table pattern", AllowQuoted);
447 eat(":", "after field-id in table pattern");
448 result ~= parsePattern(path ~ key);
449 } while( tryEat(",") );
450 eat("}", "at the end of table pattern");
451 }
452 }
453 else
454 {
455 AST e = E(0);
456 if(auto ev = cast(Var)e)
457 if(ev.name == "_")
458 result ~= new WildPattern(path);
459 else
460 result ~= new VarPattern(path, ev.name);
461 else
462 result ~= new ConstantPattern(path, e);
463 }
464 return result;
465 }
466
467 AST ppTest(string pmVar, SinglePattern[] pats)
468 {
469 auto pos = currentPosition();
470 AST cond = null;
471 foreach(p; pats) {
472 AST c2 = p.ppTest(pmVar);
473 if( c2 !is null )
474 cond = cond is null ? c2
475 : new App(pos, new Var(pos,"&&"), cond, c2);
476 }
477 return cond is null ? new Int(currentPosition(), 1) : cond;
478 }
479
480 AST ppBind(string pmVar, SinglePattern[] pats, AST thenDoThis)
481 {
482 foreach(p; pats)
483 thenDoThis = p.ppBind(pmVar, thenDoThis);
484 return thenDoThis;
485 }
486
487 AST parseId()
488 {
489 scope(exit) lex.popFront;
490 return new Str(currentPosition(), lex.front.str);
491 }
492
493 AST parseLambdaAfterOpenParen(LexPosition pos)
494 {
495 Parameter[] params;
496 while( !tryEat(")") )
497 {
498 params ~= parseParam();
499 if( !tryEat(",") ) {
500 eat(")", "after function parameters");
501 break;
502 }
503 }
504 eat("{", "after function parameters");
505 auto funbody = Body();
506 eat("}", "after function body");
507 return new Fun(pos, params, funbody);
508 }
509
510 Parameter parseParam()
511 {
512 string var;
513 string[] lay;
514 while( !closingBracket() && !lex.empty && lex.front.str!="," )
515 {
516 auto pos = currentPosition();
517 string p = eatId("for function parameter", AllowQuoted);
518 if( p == "@" )
519 lay ~= "@" ~ eatId("after @", AllowQuoted);
520 else if( var.empty )
521 var = p;
522 else
523 throw genex!ParseException(pos, "one parameter has two names");
524 }
525 return new Parameter(var, lay);
526 }
527
528 private:
529 Lexer lex;
530 this(Lexer lex) { this.lex = lex; }
531
532 bool isNumber(string s)
533 {
534 return find!(`a<'0' || '9'<a`)(s).empty;
535 }
536
537 void eat(string kwd, lazy string msg)
538 {
539 if( !tryEat(kwd) )
540 if( lex.empty )
541 throw genex!UnexpectedEOF(
542 currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
543 else
544 throw genex!ParseException(
545 currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
546 }
547
548 bool tryEat(string kwd)
549 {
550 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
551 return false;
552 lex.popFront;
553 return true;
554 }
555
556 enum {AllowQuoted=true, DisallowQuoted=false};
557 string eatId(lazy string msg, bool aq=DisallowQuoted)
558 {
559 if( lex.empty )
560 throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
561 if( !aq && lex.front.quoted )
562 throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
563 scope(exit) lex.popFront;
564 return lex.front.str;
565 }
566
567 AST doNothingExpression()
568 {
569 return new Str(currentPosition(), "(empty function body)");
570 }
571
572 LexPosition currentPosition()
573 {
574 return lex.empty ? null : lex.front.pos;
575 }
576 }
577
578 unittest
579 {
580 mixin EasyAST;
581
582 assert_eq(parseString(`123`), intl(123));
583 assert_eq(parseString(`"foo"`), strl("foo"));
584 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
585 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
586 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
587 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
588 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
589 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
590 assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
591 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
592 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
593 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
594 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
595 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
596 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],strl("(empty function body)"))));
597 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
598 assert_eq(parseString(`if(1){}else{3}()()`),
599 call(call(call(var("if"),intl(1),fun([],strl("(empty function body)")),fun([],intl(3))))));
600 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
601 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
602 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
603 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
604 assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
605 assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
606 funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], strl("(empty function body)")));
607
608 assert_eq(parseString(`
609 let x = 100; #comment
610 let y = 200; #comment!!!!!
611 x+y
612 `),
613 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
614 );
615
616 assert_eq(parseString(`
617 var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
618 fac(10)
619 `),
620 let("fac", "", fun(["x"],
621 call(var("if"),
622 call(var("<="), var("x"), intl(1)),
623 fun([], intl(1)),
624 fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
625 )),
626 call(var("fac"),intl(10))
627 )
628 );
629 }
630
631 unittest
632 {
633 assert_throw!UnexpectedEOF(parseString(`1+`));
634 assert_throw!ParseException(parseString(`1+2}`));
635 assert_throw!UnexpectedEOF(parseString(`let "x"`));
636 assert_throw!UnexpectedEOF(parseString(`var`));
637 assert_throw!ParseException(parseString(`@val x ==`));
638 assert_throw!ParseException(parseString(`if(){1}`));
639 assert_throw!UnexpectedEOF(parseString(`f(`));
640 }
641
642 unittest
643 {
644 mixin EasyAST;
645 assert_eq(parseString(`def foo(x) { x+1 }; foo`),
646 let("foo", "",
647 fun(["x"], call(var("+"), var("x"), intl(1))),
648 var("foo"))
649 );
650
651 assert_eq(parseString(`@@type ( x ) { x }`),
652 let("@type", SystemLayer, fun(["x"], var("x")), lay(SystemLayer, var("@type"))) );
653
654 assert_eq(parseString(`{}`), call(var("{}")));
655 assert_eq(parseString(`{foo:1,"bar":2}`),
656 call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
657 assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
658 assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
659 assert_eq(parseString(`x{y:1}`), call(var(".="),var("x"),strl("y"),intl(1)));
660 }
661
662 unittest
663 {
664 assert_nothrow(parseString(`
665 case( 1 )
666 when(x){1}
667 `));
668 assert_nothrow(parseString(`
669 case( 1 )
670 when({aaaa:_}){1}
671 `));
672 assert_nothrow(parseString(`
673 case( 1 )
674 when({aaaa:@value(x)}){1}
675 when({aaaa:{bbb:_}, ccc:123}){1}
676 `));
677 }
678
679 unittest
680 {
681 // test for omitting { .. }
682 assert_nothrow(parseString(`
683 if(1) 2 else 3
684 `));
685 assert_nothrow(parseString(`
686 if(1) x{y:z} else 3
687 `));
688 }