1 /**
2 * Authors: k.inaba
3 * License: NYSL 0.9982 http://www.kmonos.net/nysl/
4 *
5 * Parser for Polemy programming language
6 */
7 module polemy.parse;
8 import polemy._common;
9 import polemy.lex;
10 import polemy.ast;
11
12 /// Thrown when encountered a syntax error
13
14 class ParseException : Exception
15 {
16 mixin ExceptionWithPosition;
17 }
18
19 /// Parse a string and return its AST
20 /// Throws: ParseException, LexException, UnexpectedEOF
21
22 AST parseString(S, T...)(S str, T fn_ln_cn)
23 {
24 return parserFromString(str, fn_ln_cn).parse();
25 }
26
27 /// Parse the content of a file and return its AST
28 /// Throws: ParseException, LexException, UnexpectedEOF
29
30 AST parseFile(S, T...)(S filename, T ln_cn)
31 {
32 return parserFromFile(filename, ln_cn).parse();
33 }
34
35 // Named Constructors of Parser
36
37 private auto parserFromLexer(Lexer)(Lexer lex)
38 { return new Parser!Lexer(lex); }
39
40 private auto parserFromString(T...)(T params)
41 { return parserFromLexer(lexerFromString(params)); }
42
43 private auto parserFromFile(T...)(T params)
44 { return parserFromLexer(lexerFromFile(params)); }
45
46 // Parser
47
48 private class Parser(Lexer)
49 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
50 {
51 AST parse()
52 {
53 auto e = Body();
54 if( !lex.empty )
55 throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
56 return e;
57 }
58
59 AST Body()
60 {
61 /// Body ::= Declaration
62 /// | TopLevelExpression
63
64 if( closingBracket() )
65 return doNothingExpression();
66
67 auto saved = lex.save;
68 if( auto e = Declaration() )
69 return e;
70 lex = saved;
71 return TopLevelExpression();
72 }
73
74 AST Declaration() // returns null if it is not a declaration
75 {
76 /// Declaration ::=
77 /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
78 /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
79
80 auto pos = currentPosition();
81 string layer = "";
82
83 if( tryEat("@") )
84 {
85 layer = "@" ~ eatId("after @", AllowQuoted);
86 if( tryEat("(") )
87 return null; // @lay(...) expression, not a declaration
88 }
89
90 string kwd = layer;
91 if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
92 return null; // none of {@lay, let, var, def} occurred, it's not a declaration
93
94 auto varpos = currentPosition();
95 string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
96
97 auto e = tryEat("(")
98 ? parseLambdaAfterOpenParen(varpos) // let var ( ...
99 : (eat("=", "after "~kwd), E(0)); // let var = ...
100
101 if( moreDeclarationExists() )
102 return new LetExpression(pos, var, layer, e, Body());
103 else
104 return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
105 }
106
107 AST TopLevelExpression()
108 {
109 /// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
110
111 auto pos = currentPosition();
112 auto e = E(0);
113 if( moreDeclarationExists() )
114 return new LetExpression(pos, "_", "", e, Body());
115 else
116 return e;
117 }
118
119 private bool moreDeclarationExists()
120 {
121 return (tryEat(";") || tryEat("in")) && !closingBracket();
122 }
123
124 private bool closingBracket()
125 {
126 return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
127 }
128
129 // [TODO] make this customizable from program
130 private static string[][] operator_perferences = [
131 ["||"],
132 ["&&"],
133 ["!="],
134 ["=="],
135 ["<","<=",">",">="],
136 ["|"],
137 ["^"],
138 ["&"],
139 ["<<", ">>"],
140 ["+","-"],
141 ["~"],
142 ["*","/","%"],
143 ["^^","**"]
144 ];
145
146 AST E(size_t level)
147 {
148 /// Expression ::= (Binary left-associative operators over) Funcall
149
150 AST rec(AST lhs)
151 {
152 if( closingBracket() )
153 return lhs;
154
155 auto pos = currentPosition();
156 foreach(op; operator_perferences[level])
157 if( tryEat(op) )
158 return rec(
159 new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
160 return lhs;
161 }
162
163 if( operator_perferences.length <= level )
164 return Funcall();
165 else
166 return rec(E(level+1));
167 }
168
169 AST Funcall()
170 {
171 /// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
172
173 auto e = BaseExpression();
174 while( tryEat("(") )
175 {
176 auto pos = currentPosition();
177 AST[] args;
178 while( !tryEat(")") ) {
179 if( lex.empty )
180 throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
181 args ~= E(0);
182 if( !tryEat(",") ) {
183 eat(")", "after function parameters");
184 break;
185 }
186 }
187 e = new FuncallExpression(e.pos, e, args);
188 }
189 return e;
190 }
191
192 AST BaseExpression()
193 {
194 if( lex.empty )
195 throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
196
197 auto pos = lex.front.pos;
198 if( lex.front.quoted )
199 {
200 scope(exit) lex.popFront;
201 return new StrLiteral(pos, lex.front.str);
202 }
203 if( isNumber(lex.front.str) )
204 {
205 scope(exit) lex.popFront;
206 return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
207 }
208 if( tryEat("@") )
209 {
210 auto lay = "@"~eatId("for layer ID");
211 eat("(", "for layered execution");
212 auto e = Body();
213 eat(")", "after "~lay~"(...");
214 return new LayeredExpression(pos, lay, e);
215 }
216 if( tryEat("(") )
217 {
218 auto e = Body();
219 eat(")", "after parenthesized expression");
220 return e;
221 }
222 if( tryEat("if") )
223 {
224 eat("(", "after if");
225 auto cond = E(0);
226 eat(")", "after if condition");
227 auto thenPos = lex.front.pos;
228 eat("{", "after if condition");
229 auto th = Body();
230 eat("}", "after if-then body");
231 auto el = doNothingExpression();
232 auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
233 if( tryEat("else") ) {
234 eat("{", "after else");
235 el = Body();
236 eat("}", "after else body");
237 }
238 return new FuncallExpression(pos,
239 new VarExpression(pos, "if"),
240 cond,
241 new FunLiteral(thenPos, [], th),
242 new FunLiteral(elsePos, [], el)
243 );
244 }
245 if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
246 {
247 eat("(", "after fun");
248 return parseLambdaAfterOpenParen(pos);
249 }
250 scope(exit) lex.popFront;
251 return new VarExpression(pos, lex.front.str);
252 }
253
254 AST parseLambdaAfterOpenParen(immutable LexPosition pos)
255 {
256 Parameter[] params;
257 while( !tryEat(")") )
258 {
259 params ~= parseParam();
260 if( !tryEat(",") ) {
261 eat(")", "after function parameters");
262 break;
263 }
264 }
265 eat("{", "after function parameters");
266 auto funbody = Body();
267 eat("}", "after function body");
268 return new FunLiteral(pos, params, funbody);
269 }
270
271 Parameter parseParam()
272 {
273 string var;
274 string[] lay;
275 while( !closingBracket() && !lex.empty && lex.front.str!="," )
276 {
277 auto pos = currentPosition();
278 string p = eatId("for function parameter", AllowQuoted);
279 if( p == "@" )
280 lay ~= "@" ~ eatId("after @", AllowQuoted);
281 else if( var.empty )
282 var = p;
283 else
284 throw genex!ParseException(pos, "one parameter has two names");
285 }
286 return new Parameter(var, lay);
287 }
288
289 private:
290 Lexer lex;
291 this(Lexer lex) { this.lex = lex; }
292
293 bool isNumber(string s)
294 {
295 return find!(`a<'0' || '9'<a`)(s).empty;
296 }
297
298 void eat(string kwd, lazy string msg)
299 {
300 if( !tryEat(kwd) )
301 if( lex.empty )
302 throw genex!UnexpectedEOF(
303 currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
304 else
305 throw genex!ParseException(
306 currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
307 }
308
309 bool tryEat(string kwd)
310 {
311 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
312 return false;
313 lex.popFront;
314 return true;
315 }
316
317 enum {AllowQuoted=true, DisallowQuoted=false};
318 string eatId(lazy string msg, bool aq=DisallowQuoted)
319 {
320 if( lex.empty )
321 throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
322 if( !aq && lex.front.quoted )
323 throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
324 scope(exit) lex.popFront;
325 return lex.front.str;
326 }
327
328 AST doNothingExpression()
329 {
330 return new IntLiteral(currentPosition(), BigInt(178));
331 }
332
333 immutable(LexPosition) currentPosition()
334 {
335 return lex.empty ? null : lex.front.pos;
336 }
337 }
338
339 unittest
340 {
341 mixin EasyAST;
342
343 assert_eq(parseString(`123`), intl(123));
344 assert_eq(parseString(`"foo"`), strl("foo"));
345 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
346 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
347 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
348 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
349 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
350 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
351 assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
352 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
353 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
354 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
355 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
356 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
357 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
358 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
359 assert_eq(parseString(`if(1){}else{3}()()`),
360 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
361 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
362 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
363 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
364 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
365 assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
366 assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
367 funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
368
369 assert_eq(parseString(`
370 let x = 100; #comment
371 let y = 200; #comment!!!!!
372 x+y
373 `),
374 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
375 );
376
377 assert_eq(parseString(`
378 var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
379 fac(10)
380 `),
381 let("fac", "", fun(["x"],
382 call(var("if"),
383 call(var("<="), var("x"), intl(1)),
384 fun([], intl(1)),
385 fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
386 )),
387 call(var("fac"),intl(10))
388 )
389 );
390 }
391
392 unittest
393 {
394 assert_throw!UnexpectedEOF(parseString(`1+`));
395 assert_throw!ParseException(parseString(`1+2}`));
396 assert_throw!UnexpectedEOF(parseString(`let "x"`));
397 assert_throw!UnexpectedEOF(parseString(`var`));
398 assert_throw!ParseException(parseString(`@val x ==`));
399 assert_throw!ParseException(parseString(`if(){1}`));
400 assert_throw!UnexpectedEOF(parseString(`f(`));
401 }
402
403 unittest
404 {
405 mixin EasyAST;
406 assert_eq(parseString(`def foo(x) { x+1 }; foo`),
407 let("foo", "",
408 fun(["x"], call(var("+"), var("x"), intl(1))),
409 var("foo"))
410 );
411 }