1 /**
2 * Authors: k.inaba
3 * License: NYSL 0.9982 http://www.kmonos.net/nysl/
4 *
5 * Parser for Polemy programming language
6 */
7 module polemy.parse;
8 import polemy._common;
9 import polemy.lex;
10 import polemy.ast;
11
12 /// Thrown when encountered a syntax error
13
14 class ParseException : Exception
15 {
16 mixin ExceptionWithPosition;
17 }
18
19 /// Parse a string and return its AST
20 /// Throws: ParseException, LexException, UnexpectedEOF
21
22 AST parseString(S, T...)(S str, T fn_ln_cn)
23 {
24 return parserFromString(str, fn_ln_cn).parse();
25 }
26
27 /// Parse the content of a file and return its AST
28 /// Throws: ParseException, LexException, UnexpectedEOF
29
30 AST parseFile(S, T...)(S filename, T ln_cn)
31 {
32 return parserFromFile(filename, ln_cn).parse();
33 }
34
35 // Named Constructors of Parser
36
37 private auto parserFromLexer(Lexer)(Lexer lex)
38 { return new Parser!Lexer(lex); }
39
40 private auto parserFromString(T...)(T params)
41 { return parserFromLexer(lexerFromString(params)); }
42
43 private auto parserFromFile(T...)(T params)
44 { return parserFromLexer(lexerFromFile(params)); }
45
46 // Parser
47
48 private class Parser(Lexer)
49 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
50 {
51 AST parse()
52 {
53 auto e = Body();
54 if( !lex.empty )
55 throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
56 return e;
57 }
58
59 AST Body()
60 {
61 /// Body ::= Declaration
62 /// | TopLevelExpression
63
64 if( closingBracket() )
65 return doNothingExpression();
66
67 auto saved = lex.save;
68 if( auto e = Declaration() )
69 return e;
70 lex = saved;
71 return TopLevelExpression();
72 }
73
74 AST Declaration() // returns null if it is not a declaration
75 {
76 /// Declaration ::=
77 /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
78 /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
79 /// | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
80 /// | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
81
82 auto pos = currentPosition();
83 string layer = "";
84 bool layerRiseDecl = false;
85
86 if( tryEat("@") )
87 {
88 layer = "@" ~ eatId("after @", AllowQuoted);
89 if( layer == "@@" )
90 {
91 layer = "@" ~ eatId("after @@", AllowQuoted);
92 layerRiseDecl = true;
93 }
94 else
95 {
96 if( tryEat("(") )
97 return null; // @lay(...) expression, not a declaration
98 }
99 }
100
101 // [TODO] Refactor
102 if( layerRiseDecl )
103 {
104 string kwd = "@" ~ layer;
105 string var = layer;
106
107 auto e = tryEat("(")
108 ? parseLambdaAfterOpenParen(pos) // let var ( ...
109 : (eat("=", "after "~kwd), E(0)); // let var = ...
110 if( moreDeclarationExists() )
111 return new LetExpression(pos, var, "(system)", e, Body());
112 else
113 return new LetExpression(pos, var, "(system)", e, new VarExpression(pos, var));
114 }
115 else
116 {
117 string kwd = layer;
118 if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
119 return null; // none of {@lay, let, var, def} occurred, it's not a declaration
120
121 auto varpos = currentPosition();
122 string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
123
124 auto e = tryEat("(")
125 ? parseLambdaAfterOpenParen(varpos) // let var ( ...
126 : (eat("=", "after "~kwd), E(0)); // let var = ...
127 if( moreDeclarationExists() )
128 return new LetExpression(pos, var, layer, e, Body());
129 else
130 return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
131 }
132 }
133
134 AST TopLevelExpression()
135 {
136 /// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
137
138 auto pos = currentPosition();
139 auto e = E(0);
140 if( moreDeclarationExists() )
141 return new LetExpression(pos, "_", "", e, Body());
142 else
143 return e;
144 }
145
146 private bool moreDeclarationExists()
147 {
148 return (tryEat(";") || tryEat("in")) && !closingBracket();
149 }
150
151 private bool closingBracket()
152 {
153 return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
154 }
155
156 // [TODO] make this customizable from program
157 private static string[][] operator_perferences = [
158 ["||"],
159 ["&&"],
160 ["!="],
161 ["=="],
162 ["<","<=",">",">="],
163 ["|"],
164 ["^"],
165 ["&"],
166 ["<<", ">>"],
167 ["+","-"],
168 ["~"],
169 ["*","/","%"],
170 ["^^","**"],
171 [".",".?"]
172 ];
173
174 AST E(size_t level)
175 {
176 /// Expression ::= (Binary left-associative operators over) Funcall
177
178 AST rec(AST lhs)
179 {
180 if( closingBracket() )
181 return lhs;
182
183 auto pos = currentPosition();
184 foreach(op; operator_perferences[level])
185 if( tryEat(op) )
186 if( op[0]=='.' )
187 return rec(
188 new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, parseId()));
189 else
190 return rec(
191 new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
192 return lhs;
193 }
194
195 if( operator_perferences.length <= level )
196 return Funcall();
197 else
198 return rec(E(level+1));
199 }
200
201 AST Funcall()
202 {
203 /// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
204
205 auto e = BaseExpression();
206 while( tryEat("(") )
207 {
208 auto pos = currentPosition();
209 AST[] args;
210 while( !tryEat(")") ) {
211 if( lex.empty )
212 throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
213 args ~= E(0);
214 if( !tryEat(",") ) {
215 eat(")", "after function parameters");
216 break;
217 }
218 }
219 e = new FuncallExpression(e.pos, e, args);
220 }
221 return e;
222 }
223
224 AST BaseExpression()
225 {
226 if( lex.empty )
227 throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
228
229 auto pos = lex.front.pos;
230 if( lex.front.quoted )
231 {
232 scope(exit) lex.popFront;
233 return new StrLiteral(pos, lex.front.str);
234 }
235 if( isNumber(lex.front.str) )
236 {
237 scope(exit) lex.popFront;
238 return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
239 }
240 if( tryEat("@") )
241 {
242 auto lay = "@"~eatId("for layer ID");
243 eat("(", "for layered execution");
244 auto e = Body();
245 eat(")", "after "~lay~"(...");
246 return new LayeredExpression(pos, lay, e);
247 }
248 if( tryEat("(") )
249 {
250 auto e = Body();
251 eat(")", "after parenthesized expression");
252 return e;
253 }
254 if( tryEat("{") )
255 {
256 AST e = new FuncallExpression(pos, new VarExpression(pos,"{}"));
257 if( tryEat("}") )
258 return e;
259 for(;;)
260 {
261 string key = eatId("for table key", AllowQuoted);
262 eat(":", "after table key");
263 AST val = E(0);
264 e = new FuncallExpression(pos, new VarExpression(pos,".="),
265 e, new StrLiteral(pos,key), val);
266 if( !tryEat(",") )
267 {
268 eat("}", "for the end of table literal");
269 break;
270 }
271 }
272 return e;
273 }
274 if( tryEat("if") )
275 {
276 eat("(", "after if");
277 auto cond = E(0);
278 eat(")", "after if condition");
279 auto thenPos = lex.front.pos;
280 eat("{", "after if condition");
281 auto th = Body();
282 eat("}", "after if-then body");
283 auto el = doNothingExpression();
284 auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
285 if( tryEat("else") ) {
286 eat("{", "after else");
287 el = Body();
288 eat("}", "after else body");
289 }
290 return new FuncallExpression(pos,
291 new VarExpression(pos, "if"),
292 cond,
293 new FunLiteral(thenPos, [], th),
294 new FunLiteral(elsePos, [], el)
295 );
296 }
297 if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
298 {
299 eat("(", "after fun");
300 return parseLambdaAfterOpenParen(pos);
301 }
302 scope(exit) lex.popFront;
303 return new VarExpression(pos, lex.front.str);
304 }
305
306 AST parseId()
307 {
308 scope(exit) lex.popFront;
309 return new StrLiteral(currentPosition(), lex.front.str);
310 }
311
312 AST parseLambdaAfterOpenParen(immutable LexPosition pos)
313 {
314 Parameter[] params;
315 while( !tryEat(")") )
316 {
317 params ~= parseParam();
318 if( !tryEat(",") ) {
319 eat(")", "after function parameters");
320 break;
321 }
322 }
323 eat("{", "after function parameters");
324 auto funbody = Body();
325 eat("}", "after function body");
326 return new FunLiteral(pos, params, funbody);
327 }
328
329 Parameter parseParam()
330 {
331 string var;
332 string[] lay;
333 while( !closingBracket() && !lex.empty && lex.front.str!="," )
334 {
335 auto pos = currentPosition();
336 string p = eatId("for function parameter", AllowQuoted);
337 if( p == "@" )
338 lay ~= "@" ~ eatId("after @", AllowQuoted);
339 else if( var.empty )
340 var = p;
341 else
342 throw genex!ParseException(pos, "one parameter has two names");
343 }
344 return new Parameter(var, lay);
345 }
346
347 private:
348 Lexer lex;
349 this(Lexer lex) { this.lex = lex; }
350
351 bool isNumber(string s)
352 {
353 return find!(`a<'0' || '9'<a`)(s).empty;
354 }
355
356 void eat(string kwd, lazy string msg)
357 {
358 if( !tryEat(kwd) )
359 if( lex.empty )
360 throw genex!UnexpectedEOF(
361 currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
362 else
363 throw genex!ParseException(
364 currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
365 }
366
367 bool tryEat(string kwd)
368 {
369 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
370 return false;
371 lex.popFront;
372 return true;
373 }
374
375 enum {AllowQuoted=true, DisallowQuoted=false};
376 string eatId(lazy string msg, bool aq=DisallowQuoted)
377 {
378 if( lex.empty )
379 throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
380 if( !aq && lex.front.quoted )
381 throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
382 scope(exit) lex.popFront;
383 return lex.front.str;
384 }
385
386 AST doNothingExpression()
387 {
388 return new IntLiteral(currentPosition(), BigInt(178));
389 }
390
391 immutable(LexPosition) currentPosition()
392 {
393 return lex.empty ? null : lex.front.pos;
394 }
395 }
396
397 unittest
398 {
399 mixin EasyAST;
400
401 assert_eq(parseString(`123`), intl(123));
402 assert_eq(parseString(`"foo"`), strl("foo"));
403 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
404 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
405 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
406 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
407 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
408 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
409 assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
410 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
411 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
412 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
413 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
414 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
415 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
416 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
417 assert_eq(parseString(`if(1){}else{3}()()`),
418 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
419 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
420 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
421 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
422 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
423 assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
424 assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
425 funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
426
427 assert_eq(parseString(`
428 let x = 100; #comment
429 let y = 200; #comment!!!!!
430 x+y
431 `),
432 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
433 );
434
435 assert_eq(parseString(`
436 var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
437 fac(10)
438 `),
439 let("fac", "", fun(["x"],
440 call(var("if"),
441 call(var("<="), var("x"), intl(1)),
442 fun([], intl(1)),
443 fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
444 )),
445 call(var("fac"),intl(10))
446 )
447 );
448 }
449
450 unittest
451 {
452 assert_throw!UnexpectedEOF(parseString(`1+`));
453 assert_throw!ParseException(parseString(`1+2}`));
454 assert_throw!UnexpectedEOF(parseString(`let "x"`));
455 assert_throw!UnexpectedEOF(parseString(`var`));
456 assert_throw!ParseException(parseString(`@val x ==`));
457 assert_throw!ParseException(parseString(`if(){1}`));
458 assert_throw!UnexpectedEOF(parseString(`f(`));
459 }
460
461 unittest
462 {
463 mixin EasyAST;
464 assert_eq(parseString(`def foo(x) { x+1 }; foo`),
465 let("foo", "",
466 fun(["x"], call(var("+"), var("x"), intl(1))),
467 var("foo"))
468 );
469
470 assert_eq(parseString(`@@type ( x ) { x }`),
471 let("@type", "(system)", fun(["x"], var("x")), var("@type")) );
472
473 assert_eq(parseString(`{}`), call(var("{}")));
474 assert_eq(parseString(`{foo:1,"bar":2}`),
475 call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
476 assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
477 assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
478 }