1 /**
2 * Authors: k.inaba
3 * License: NYSL 0.9982 http://www.kmonos.net/nysl/
4 *
5 * Parser for Polemy programming language
6 */
7 module polemy.parse;
8 import polemy._common;
9 import polemy.lex;
10 import polemy.ast;
11
12 /// Thrown when encountered a syntax error
13
14 class ParseException : Exception
15 {
16 mixin ExceptionWithPosition;
17 }
18
19 /// Parse a string and return its AST
20 /// Throws: ParseException, LexException, UnexpectedEOF
21
22 AST parseString(S, T...)(S str, T fn_ln_cn)
23 {
24 return parserFromString(str, fn_ln_cn).parse();
25 }
26
27 /// Parse the content of a file and return its AST
28 /// Throws: ParseException, LexException, UnexpectedEOF
29
30 AST parseFile(S, T...)(S filename, T ln_cn)
31 {
32 return parserFromFile(filename, ln_cn).parse();
33 }
34
35 // Named Constructors of Parser
36
37 private auto parserFromLexer(Lexer)(Lexer lex)
38 { return new Parser!Lexer(lex); }
39
40 private auto parserFromString(T...)(T params)
41 { return parserFromLexer(lexerFromString(params)); }
42
43 private auto parserFromFile(T...)(T params)
44 { return parserFromLexer(lexerFromFile(params)); }
45
46 // Parser
47
48 private class Parser(Lexer)
49 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
50 {
51 AST parse()
52 {
53 auto e = Body();
54 if( !lex.empty )
55 throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
56 return e;
57 }
58
59 AST Body()
60 {
61 /// Body ::= Declaration
62 /// | TopLevelExpression
63
64 if( closingBracket() )
65 return doNothingExpression();
66
67 auto saved = lex.save;
68 if( auto e = Declaration() )
69 return e;
70 lex = saved;
71 return TopLevelExpression();
72 }
73
74 AST Declaration() // returns null if it is not a declaration
75 {
76 /// Declaration ::=
77 /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
78 /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
79 /// | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
80 /// | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
81
82 auto pos = currentPosition();
83 string layer = "";
84 bool layerRiseDecl = false;
85
86 if( tryEat("@") )
87 {
88 layer = "@" ~ eatId("after @", AllowQuoted);
89 if( layer == "@@" )
90 {
91 layer = "@" ~ eatId("after @@", AllowQuoted);
92 layerRiseDecl = true;
93 }
94 else
95 {
96 if( tryEat("(") )
97 return null; // @lay(...) expression, not a declaration
98 }
99 }
100
101 // [TODO] Refactor
102 if( layerRiseDecl )
103 {
104 string kwd = "@" ~ layer;
105 string var = layer;
106
107 auto e = tryEat("(")
108 ? parseLambdaAfterOpenParen(pos) // let var ( ...
109 : (eat("=", "after "~kwd), E(0)); // let var = ...
110 if( moreDeclarationExists() )
111 return new LetExpression(pos, var, "(system)", e, Body());
112 else
113 return new LetExpression(pos, var, "(system)", e, new VarExpression(pos, var));
114 }
115 else
116 {
117 string kwd = layer;
118 if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
119 return null; // none of {@lay, let, var, def} occurred, it's not a declaration
120
121 auto varpos = currentPosition();
122 string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
123
124 auto e = tryEat("(")
125 ? parseLambdaAfterOpenParen(varpos) // let var ( ...
126 : (eat("=", "after "~kwd), E(0)); // let var = ...
127 if( moreDeclarationExists() )
128 return new LetExpression(pos, var, layer, e, Body());
129 else
130 return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
131 }
132 }
133
134 AST TopLevelExpression()
135 {
136 /// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
137
138 auto pos = currentPosition();
139 auto e = E(0);
140 if( moreDeclarationExists() )
141 return new LetExpression(pos, "_", "", e, Body());
142 else
143 return e;
144 }
145
146 private bool moreDeclarationExists()
147 {
148 return (tryEat(";") || tryEat("in")) && !closingBracket();
149 }
150
151 private bool closingBracket()
152 {
153 return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
154 }
155
156 // [TODO] make this customizable from program
157 private static string[][] operator_perferences = [
158 ["||"],
159 ["&&"],
160 ["!="],
161 ["=="],
162 ["<","<=",">",">="],
163 ["|"],
164 ["^"],
165 ["&"],
166 ["<<", ">>"],
167 ["+","-"],
168 ["~"],
169 ["*","/","%"],
170 ["^^","**"]
171 ];
172
173 AST E(size_t level)
174 {
175 /// Expression ::= (Binary left-associative operators over) Funcall
176
177 AST rec(AST lhs)
178 {
179 if( closingBracket() )
180 return lhs;
181
182 auto pos = currentPosition();
183 foreach(op; operator_perferences[level])
184 if( tryEat(op) )
185 return rec(
186 new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
187 return lhs;
188 }
189
190 if( operator_perferences.length <= level )
191 return Funcall();
192 else
193 return rec(E(level+1));
194 }
195
196 AST Funcall()
197 {
198 /// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
199
200 auto e = BaseExpression();
201 while( tryEat("(") )
202 {
203 auto pos = currentPosition();
204 AST[] args;
205 while( !tryEat(")") ) {
206 if( lex.empty )
207 throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
208 args ~= E(0);
209 if( !tryEat(",") ) {
210 eat(")", "after function parameters");
211 break;
212 }
213 }
214 e = new FuncallExpression(e.pos, e, args);
215 }
216 return e;
217 }
218
219 AST BaseExpression()
220 {
221 if( lex.empty )
222 throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
223
224 auto pos = lex.front.pos;
225 if( lex.front.quoted )
226 {
227 scope(exit) lex.popFront;
228 return new StrLiteral(pos, lex.front.str);
229 }
230 if( isNumber(lex.front.str) )
231 {
232 scope(exit) lex.popFront;
233 return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
234 }
235 if( tryEat("@") )
236 {
237 auto lay = "@"~eatId("for layer ID");
238 eat("(", "for layered execution");
239 auto e = Body();
240 eat(")", "after "~lay~"(...");
241 return new LayeredExpression(pos, lay, e);
242 }
243 if( tryEat("(") )
244 {
245 auto e = Body();
246 eat(")", "after parenthesized expression");
247 return e;
248 }
249 if( tryEat("if") )
250 {
251 eat("(", "after if");
252 auto cond = E(0);
253 eat(")", "after if condition");
254 auto thenPos = lex.front.pos;
255 eat("{", "after if condition");
256 auto th = Body();
257 eat("}", "after if-then body");
258 auto el = doNothingExpression();
259 auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
260 if( tryEat("else") ) {
261 eat("{", "after else");
262 el = Body();
263 eat("}", "after else body");
264 }
265 return new FuncallExpression(pos,
266 new VarExpression(pos, "if"),
267 cond,
268 new FunLiteral(thenPos, [], th),
269 new FunLiteral(elsePos, [], el)
270 );
271 }
272 if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
273 {
274 eat("(", "after fun");
275 return parseLambdaAfterOpenParen(pos);
276 }
277 scope(exit) lex.popFront;
278 return new VarExpression(pos, lex.front.str);
279 }
280
281 AST parseLambdaAfterOpenParen(immutable LexPosition pos)
282 {
283 Parameter[] params;
284 while( !tryEat(")") )
285 {
286 params ~= parseParam();
287 if( !tryEat(",") ) {
288 eat(")", "after function parameters");
289 break;
290 }
291 }
292 eat("{", "after function parameters");
293 auto funbody = Body();
294 eat("}", "after function body");
295 return new FunLiteral(pos, params, funbody);
296 }
297
298 Parameter parseParam()
299 {
300 string var;
301 string[] lay;
302 while( !closingBracket() && !lex.empty && lex.front.str!="," )
303 {
304 auto pos = currentPosition();
305 string p = eatId("for function parameter", AllowQuoted);
306 if( p == "@" )
307 lay ~= "@" ~ eatId("after @", AllowQuoted);
308 else if( var.empty )
309 var = p;
310 else
311 throw genex!ParseException(pos, "one parameter has two names");
312 }
313 return new Parameter(var, lay);
314 }
315
316 private:
317 Lexer lex;
318 this(Lexer lex) { this.lex = lex; }
319
320 bool isNumber(string s)
321 {
322 return find!(`a<'0' || '9'<a`)(s).empty;
323 }
324
325 void eat(string kwd, lazy string msg)
326 {
327 if( !tryEat(kwd) )
328 if( lex.empty )
329 throw genex!UnexpectedEOF(
330 currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
331 else
332 throw genex!ParseException(
333 currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
334 }
335
336 bool tryEat(string kwd)
337 {
338 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
339 return false;
340 lex.popFront;
341 return true;
342 }
343
344 enum {AllowQuoted=true, DisallowQuoted=false};
345 string eatId(lazy string msg, bool aq=DisallowQuoted)
346 {
347 if( lex.empty )
348 throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
349 if( !aq && lex.front.quoted )
350 throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
351 scope(exit) lex.popFront;
352 return lex.front.str;
353 }
354
355 AST doNothingExpression()
356 {
357 return new IntLiteral(currentPosition(), BigInt(178));
358 }
359
360 immutable(LexPosition) currentPosition()
361 {
362 return lex.empty ? null : lex.front.pos;
363 }
364 }
365
366 unittest
367 {
368 mixin EasyAST;
369
370 assert_eq(parseString(`123`), intl(123));
371 assert_eq(parseString(`"foo"`), strl("foo"));
372 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
373 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
374 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
375 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
376 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
377 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
378 assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
379 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
380 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
381 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
382 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
383 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
384 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
385 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
386 assert_eq(parseString(`if(1){}else{3}()()`),
387 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
388 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
389 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
390 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
391 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
392 assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
393 assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
394 funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
395
396 assert_eq(parseString(`
397 let x = 100; #comment
398 let y = 200; #comment!!!!!
399 x+y
400 `),
401 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
402 );
403
404 assert_eq(parseString(`
405 var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
406 fac(10)
407 `),
408 let("fac", "", fun(["x"],
409 call(var("if"),
410 call(var("<="), var("x"), intl(1)),
411 fun([], intl(1)),
412 fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
413 )),
414 call(var("fac"),intl(10))
415 )
416 );
417 }
418
419 unittest
420 {
421 assert_throw!UnexpectedEOF(parseString(`1+`));
422 assert_throw!ParseException(parseString(`1+2}`));
423 assert_throw!UnexpectedEOF(parseString(`let "x"`));
424 assert_throw!UnexpectedEOF(parseString(`var`));
425 assert_throw!ParseException(parseString(`@val x ==`));
426 assert_throw!ParseException(parseString(`if(){1}`));
427 assert_throw!UnexpectedEOF(parseString(`f(`));
428 }
429
430 unittest
431 {
432 mixin EasyAST;
433 assert_eq(parseString(`def foo(x) { x+1 }; foo`),
434 let("foo", "",
435 fun(["x"], call(var("+"), var("x"), intl(1))),
436 var("foo"))
437 );
438
439 assert_eq(parseString(`@@type ( x ) { x }`),
440 let("@type", "(system)", fun(["x"], var("x")), var("@type")) );
441 }