Diff
Not logged in

Differences From Artifact [d3a7b70fa2f3b803]:

To Artifact [8212de2433d6e818]:


5 * Parser for Polemy programming language 5 * Parser for Polemy programming language 6 */ 6 */ 7 module polemy.parse; 7 module polemy.parse; 8 import polemy._common; 8 import polemy._common; 9 import polemy.lex; 9 import polemy.lex; 10 import polemy.ast; 10 import polemy.ast; 11 11 12 /// | 12 /// Thrown when encountered a syntax error > 13 13 class ParseException : Exception 14 class ParseException : Exception 14 { 15 { 15 mixin ExceptionWithPosition; 16 mixin ExceptionWithPosition; 16 } 17 } 17 18 18 /// Entry points of this module | 19 /// Parse a string and return its AST > 20 /// Throws: ParseException, LexException, UnexpectedEOF 19 21 20 AST parseString(S, T...)(S str, T fn_ln_cn) 22 AST parseString(S, T...)(S str, T fn_ln_cn) > 23 { 21 { return parserFromString(str, fn_ln_cn).parse(); } | 24 return parserFromString(str, fn_ln_cn).parse(); > 25 } 22 26 23 /// Entry points of this module | 27 /// Parse the content of a file and return its AST > 28 /// Throws: ParseException, LexException, UnexpectedEOF 24 29 25 AST parseFile(S, T...)(S filename, T ln_cn) 30 AST parseFile(S, T...)(S filename, T ln_cn) > 31 { 26 { return parserFromFile(filename, ln_cn).parse(); } | 32 return parserFromFile(filename, ln_cn).parse(); > 33 } 27 34 28 // Named Constructors of Parser 35 // Named Constructors of Parser 29 36 30 private auto parserFromLexer(Lexer)(Lexer lex) 37 private auto parserFromLexer(Lexer)(Lexer lex) 31 { return new Parser!Lexer(lex); } 38 { return new Parser!Lexer(lex); } 32 39 33 private auto parserFromString(T...)(T params) 40 private auto parserFromString(T...)(T params) 34 { return parserFromLexer(polemy.lex.lexerFromString(params)); } | 41 { return parserFromLexer(lexerFromString(params)); } 35 42 36 private auto parserFromFile(T...)(T params) 43 private auto parserFromFile(T...)(T params) 37 { return parserFromLexer(polemy.lex.lexerFromFile(params)); } | 44 { return parserFromLexer(lexerFromFile(params)); } 38 45 39 // Parser 46 // Parser 40 47 41 private class Parser(Lexer) 48 private class Parser(Lexer) 42 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) ) 49 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) ) 43 { 50 { 44 AST parse() 51 AST parse() ................................................................................................................................................................................ 47 if( !lex.empty ) 54 if( !lex.empty ) 48 throw genex!ParseException(currentPosition(), "parsing e 55 throw genex!ParseException(currentPosition(), "parsing e 49 return e; 56 return e; 50 } 57 } 51 58 52 AST Body() 59 AST Body() 53 { 60 { 54 if( lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex. | 61 /// Body ::= Declaration > 62 /// | TopLevelExpression > 63 > 64 if( closingBracket() ) 55 return doNothingExpression(); 65 return doNothingExpression(); 56 66 57 auto saved = lex.save; 67 auto saved = lex.save; > 68 if( auto e = Declaration() ) > 69 return e; > 70 lex = saved; > 71 return TopLevelExpression(); > 72 } > 73 > 74 AST Declaration() // returns null if it is not a declaration > 75 { > 76 /// Declaration ::= > 77 /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"i > 78 /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" B > 79 58 auto pos = lex.front.pos; | 80 auto pos = currentPosition(); 59 string kwd = lex.front.str; < 60 if( tryEat("let") || tryEat("var") || tryEat("def") || tryEat("@ < > 81 string layer = ""; > 82 > 83 if( tryEat("@") ) 61 { 84 { 62 if( kwd == "@" ) { < 63 kwd ~= eatId("after @",true); | 85 layer = "@" ~ eatId("after @", AllowQuoted); 64 if( tryEat("(") ) { | 86 if( tryEat("(") ) 65 lex = saved; < 66 goto asExpression; < 67 } < 68 } < 69 immutable LexPosition varpos = (lex.empty ? null : lex.f < 70 string var = eatId("after "~kwd,true); < 71 // [TODO] refactor. only auto e = ... differ < 72 if(tryEat("(")) { < 73 kwd = (kwd[0]=='@' ? kwd : ""); // "let, var, de < 74 auto e = parseLambdaAfterOpenParen(varpos); < 75 if( tryEat(";") && !lex.empty && (lex.front.quot < 76 return new LetExpression(pos, var, kwd, < 77 else < 78 return new LetExpression(pos, var, kwd, < 79 } else { < 80 eat("=", "after "~kwd); < 81 kwd = (kwd[0]=='@' ? kwd : ""); // "let, var, de < 82 auto e = E(0); < 83 if( tryEat(";") && !lex.empty && (lex.front.quot < 84 return new LetExpression(pos, var, kwd, < 85 else < 86 return new LetExpression(pos, var, kwd, < 87 } < > 87 return null; // @lay(...) expression, not a decl 88 } 88 } > 89 > 90 string kwd = layer; > 91 if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && ! > 92 return null; // none of {@lay, let, var, def} occurred, > 93 > 94 auto varpos = currentPosition(); > 95 string var = eatId("after "~kwd, AllowQuoted); // name of the de > 96 > 97 auto e = tryEat("(") > 98 ? parseLambdaAfterOpenParen(varpos) // let var ( ... > 99 : (eat("=", "after "~kwd), E(0)); // let var = ... > 100 > 101 if( moreDeclarationExists() ) > 102 return new LetExpression(pos, var, layer, e, Body()); > 103 else > 104 return new LetExpression(pos, var, layer, e, new VarExpr > 105 } > 106 > 107 AST TopLevelExpression() > 108 { > 109 /// TopLevelExpression ::= Expression ([";"|"in"] Body?)? > 110 > 111 auto pos = currentPosition(); > 112 auto e = E(0); > 113 if( moreDeclarationExists() ) > 114 return new LetExpression(pos, "_", "", e, Body()); 89 else 115 else > 116 return e; > 117 } 90 { | 118 91 asExpression: < 92 auto e = E(0); < 93 if( tryEat(";") && !lex.empty && (lex.front.quoted || (l | 119 private bool moreDeclarationExists() 94 return new LetExpression(pos, "_", "", e, Body() < > 120 { 95 else | 121 return (tryEat(";") || tryEat("in")) && !closingBracket(); 96 return e; < 97 } | 122 } > 123 > 124 private bool closingBracket() > 125 { > 126 return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(l 98 } 127 } 99 128 100 // [TODO] make customizable from program | 129 // [TODO] make this customizable from program 101 static immutable string[][] operator_perferences = [ | 130 private static string[][] operator_perferences = [ 102 ["||"], 131 ["||"], 103 ["&&"], 132 ["&&"], 104 ["!="], 133 ["!="], 105 ["=="], 134 ["=="], 106 ["<","<=",">",">="], 135 ["<","<=",">",">="], 107 ["|"], 136 ["|"], 108 ["^"], 137 ["^"], 109 ["&"], 138 ["&"], 110 ["<<", ">>"], 139 ["<<", ">>"], 111 ["+","-"], 140 ["+","-"], 112 ["~"], 141 ["~"], 113 ["*","/","%"], 142 ["*","/","%"], 114 ["^^"] | 143 ["^^","**"] 115 ]; 144 ]; 116 145 117 AST E(int level) | 146 AST E(size_t level) 118 { 147 { > 148 /// Expression ::= (Binary left-associative operators over) Func > 149 > 150 AST rec(AST lhs) > 151 { > 152 if( closingBracket() ) > 153 return lhs; > 154 > 155 auto pos = currentPosition(); > 156 foreach(op; operator_perferences[level]) > 157 if( tryEat(op) ) > 158 return rec( > 159 new FuncallExpression(lhs.pos, n > 160 return lhs; > 161 } > 162 119 if( operator_perferences.length <= level ) 163 if( operator_perferences.length <= level ) 120 return Funcall(); 164 return Funcall(); 121 else 165 else 122 { < 123 auto ops = operator_perferences[level]; < 124 auto e = E(level+1); | 166 return rec(E(level+1)); 125 seq: < 126 while( !lex.empty ) < 127 { < 128 auto pos = lex.front.pos; < 129 foreach(op; ops) < 130 if( tryEat(op) ) < 131 { < 132 e = new FuncallExpression(e.pos, < 133 continue seq; < 134 } < 135 break; < 136 } < 137 return e; < 138 } < 139 } 167 } 140 168 141 AST Funcall() 169 AST Funcall() 142 { 170 { > 171 /// Funcall ::= BaseExpression ["(" Expression%"," ")"]* > 172 143 auto e = BaseExpression(); 173 auto e = BaseExpression(); 144 while( tryEat("(") ) 174 while( tryEat("(") ) 145 { 175 { 146 auto pos = currentPosition(); 176 auto pos = currentPosition(); 147 AST[] args; 177 AST[] args; 148 while( !tryEat(")") ) { 178 while( !tryEat(")") ) { 149 if( lex.empty ) 179 if( lex.empty ) 150 throw genex!UnexpectedEOF(pos,"Closing ' | 180 throw genex!UnexpectedEOF(pos, "closing 151 args ~= E(0); 181 args ~= E(0); 152 if( !tryEat(",") ) { 182 if( !tryEat(",") ) { 153 eat(")", "after function parameters"); 183 eat(")", "after function parameters"); 154 break; 184 break; 155 } 185 } 156 } 186 } 157 e = new FuncallExpression(e.pos, e, args); 187 e = new FuncallExpression(e.pos, e, args); ................................................................................................................................................................................ 208 return new FuncallExpression(pos, 238 return new FuncallExpression(pos, 209 new VarExpression(pos, "if"), 239 new VarExpression(pos, "if"), 210 cond, 240 cond, 211 new FunLiteral(thenPos, [], th), 241 new FunLiteral(thenPos, [], th), 212 new FunLiteral(elsePos, [], el) 242 new FunLiteral(elsePos, [], el) 213 ); 243 ); 214 } 244 } 215 if( tryEat("fun") || tryEat("\u03BB") ) | 245 if( tryEat("fun") || tryEat("\u03BB") ) // lambda!! 216 { 246 { 217 eat("(", "after fun"); 247 eat("(", "after fun"); 218 return parseLambdaAfterOpenParen(pos); 248 return parseLambdaAfterOpenParen(pos); 219 } 249 } 220 scope(exit) lex.popFront; 250 scope(exit) lex.popFront; 221 return new VarExpression(pos, lex.front.str); 251 return new VarExpression(pos, lex.front.str); 222 } 252 } 223 253 224 AST parseLambdaAfterOpenParen(immutable LexPosition pos) 254 AST parseLambdaAfterOpenParen(immutable LexPosition pos) 225 { 255 { 226 Parameter[] params; 256 Parameter[] params; 227 while( !tryEat(")") ) 257 while( !tryEat(")") ) 228 { 258 { 229 params ~= new Parameter(eatId("for function parameter"), | 259 params ~= parseParam(); 230 if( !tryEat(",") ) { 260 if( !tryEat(",") ) { 231 eat(")", "after function parameters"); 261 eat(")", "after function parameters"); 232 break; 262 break; 233 } 263 } 234 } 264 } 235 eat("{", "after function parameters"); 265 eat("{", "after function parameters"); 236 auto funbody = Body(); 266 auto funbody = Body(); 237 eat("}", "after function body"); 267 eat("}", "after function body"); 238 return new FunLiteral(pos, params, funbody); 268 return new FunLiteral(pos, params, funbody); 239 } 269 } > 270 > 271 Parameter parseParam() > 272 { > 273 string var; > 274 string[] lay; > 275 while( !closingBracket() && !lex.empty && lex.front.str!="," ) > 276 { > 277 auto pos = currentPosition(); > 278 string p = eatId("for function parameter", AllowQuoted); > 279 if( p == "@" ) > 280 lay ~= "@" ~ eatId("after @", AllowQuoted); > 281 else if( var.empty ) > 282 var = p; > 283 else > 284 throw genex!ParseException(pos, "one parameter h > 285 } > 286 return new Parameter(var, lay); > 287 } 240 288 241 private: 289 private: 242 Lexer lex; 290 Lexer lex; 243 this(Lexer lex) { this.lex = lex; } 291 this(Lexer lex) { this.lex = lex; } > 292 > 293 bool isNumber(string s) > 294 { > 295 return find!(`a<'0' || '9'<a`)(s).empty; > 296 } 244 297 245 void eat(string kwd, lazy string msg) 298 void eat(string kwd, lazy string msg) 246 { 299 { 247 if( !tryEat(kwd) ) 300 if( !tryEat(kwd) ) 248 if( lex.empty ) 301 if( lex.empty ) 249 throw genex!UnexpectedEOF( 302 throw genex!UnexpectedEOF( 250 currentPosition(), sprintf!"%s is expect 303 currentPosition(), sprintf!"%s is expect ................................................................................................................................................................................ 257 { 310 { 258 if( lex.empty || lex.front.quoted || lex.front.str!=kwd ) 311 if( lex.empty || lex.front.quoted || lex.front.str!=kwd ) 259 return false; 312 return false; 260 lex.popFront; 313 lex.popFront; 261 return true; 314 return true; 262 } 315 } 263 316 > 317 enum {AllowQuoted=true, DisallowQuoted=false}; 264 string eatId(lazy string msg, bool allowQuoted=false) | 318 string eatId(lazy string msg, bool aq=DisallowQuoted) 265 { 319 { 266 if( lex.empty ) 320 if( lex.empty ) 267 throw genex!UnexpectedEOF(currentPosition(), "identifier 321 throw genex!UnexpectedEOF(currentPosition(), "identifier 268 if( !allowQuoted && lex.front.quoted ) | 322 if( !aq && lex.front.quoted ) 269 throw genex!ParseException(currentPosition(), "identifie 323 throw genex!ParseException(currentPosition(), "identifie 270 scope(exit) lex.popFront; 324 scope(exit) lex.popFront; 271 return lex.front.str; 325 return lex.front.str; 272 } 326 } 273 327 274 bool isNumber(string s) < 275 { < 276 return find!(`a<'0'||'9'<a`)(s).empty; < 277 } < 278 < 279 AST doNothingExpression() 328 AST doNothingExpression() 280 { 329 { 281 return new IntLiteral(currentPosition(), BigInt(178)); 330 return new IntLiteral(currentPosition(), BigInt(178)); 282 } 331 } 283 332 284 immutable(LexPosition) currentPosition() 333 immutable(LexPosition) currentPosition() 285 { 334 { ................................................................................................................................................................................ 295 assert_eq(parseString(`"foo"`), strl("foo")); 344 assert_eq(parseString(`"foo"`), strl("foo")); 296 assert_eq(parseString(`fun(){1}`), fun([],intl(1))); 345 assert_eq(parseString(`fun(){1}`), fun([],intl(1))); 297 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1))); 346 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1))); 298 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1))); 347 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1))); 299 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1))); 348 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1))); 300 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2))); 349 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2))); 301 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2))); 350 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2))); 302 assert_eq(parseString(`let x=1;2`), let("x","",intl(1),intl(2))); | 351 assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2))); 303 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2))); 352 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2))); 304 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x"))); 353 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x"))); 305 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x"))); 354 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x"))); 306 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var 355 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var 307 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2))); 356 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2))); 308 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2) 357 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2) 309 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([], 358 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([], ................................................................................................................................................................................ 310 assert_eq(parseString(`if(1){}else{3}()()`), 359 assert_eq(parseString(`if(1){}else{3}()()`), 311 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3 360 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3 312 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl 361 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl 313 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),in 362 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),in 314 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),in 363 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),in 315 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl 364 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl 316 assert_eq(parseString(`@x(1)`), lay("@x", intl(1))); 365 assert_eq(parseString(`@x(1)`), lay("@x", intl(1))); > 366 assert_eq(parseString(`fun(x @v @t, y, z @t){}`), > 367 funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], 317 368 318 assert_eq(parseString(` 369 assert_eq(parseString(` 319 let x = 100; #comment 370 let x = 100; #comment 320 let y = 200; #comment!!!!! 371 let y = 200; #comment!!!!! 321 x+y 372 x+y 322 `), 373 `), 323 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), v 374 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), v