Artifact Content
Not logged in

Artifact 6bec0997c5764737c9398c381cd6556587936c28


     1  /**
     2   * Authors: k.inaba
     3   * License: NYSL 0.9982 http://www.kmonos.net/nysl/
     4   *
     5   * Parser for Polemy programming language
     6   */
     7  module polemy.parse;
     8  import polemy._common;
     9  import polemy.lex;
    10  import polemy.ast;
    11  
    12  /// Thrown when encountered a syntax error
    13  
    14  class ParseException : Exception
    15  {
    16  	mixin ExceptionWithPosition;
    17  }
    18  
    19  /// Parse a string and return its AST
    20  /// Throws: ParseException, LexException, UnexpectedEOF
    21  
    22  AST parseString(S, T...)(S str, T fn_ln_cn)
    23  {
    24  	return parserFromString(str, fn_ln_cn).parse();
    25  }
    26  
    27  /// Parse the content of a file and return its AST
    28  /// Throws: ParseException, LexException, UnexpectedEOF
    29  
    30  AST parseFile(S, T...)(S filename, T ln_cn)
    31  {
    32  	return parserFromFile(filename, ln_cn).parse();
    33  }
    34  
    35  // Named Constructors of Parser
    36  
    37  private auto parserFromLexer(Lexer)(Lexer lex)
    38  	{ return new Parser!Lexer(lex); }
    39  
    40  private auto parserFromString(T...)(T params)
    41  	{ return parserFromLexer(lexerFromString(params)); }
    42  
    43  private auto parserFromFile(T...)(T params)
    44  	{ return parserFromLexer(lexerFromFile(params)); }
    45  
    46  // Parser
    47  
    48  private class Parser(Lexer)
    49  	if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
    50  {
    51  	AST parse()
    52  	{
    53  		auto e = Body();
    54  		if( !lex.empty )
    55  			throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
    56  		return e;
    57  	}
    58  
    59  	AST Body()
    60  	{
    61  		/// Body ::= Declaration
    62  		///        | TopLevelExpression
    63  
    64  		if( closingBracket() )
    65  			return doNothingExpression();
    66  
    67  		auto saved = lex.save;
    68  		if( auto e = Declaration() )
    69  			return e;
    70  		lex = saved;
    71  		return TopLevelExpression();
    72  	}
    73  
    74  	AST Declaration() // returns null if it is not a declaration
    75  	{
    76  		/// Declaration ::=
    77  		///    ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
    78  		///  | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
    79  		///  | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
    80  		///  | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
    81  
    82  		auto pos = currentPosition();
    83  		string layer = "";
    84  		bool layerRiseDecl = false;
    85  
    86  		if( tryEat("@") )
    87  		{
    88  			layer = "@" ~ eatId("after @", AllowQuoted);
    89  			if( layer == "@@" )
    90  			{
    91  				layer = "@" ~ eatId("after @@", AllowQuoted);
    92  				layerRiseDecl = true;
    93  			}
    94  			else
    95  			{
    96  				if( tryEat("(") )
    97  					return null; // @lay(...) expression, not a declaration
    98  			}
    99  		}
   100  
   101  		// [TODO] Refactor
   102  		if( layerRiseDecl )
   103  		{
   104  			string kwd = "@" ~ layer;
   105  			string var = layer;
   106  
   107  			auto e = tryEat("(")
   108  				? parseLambdaAfterOpenParen(pos)  // let var ( ...
   109  				: (eat("=", "after "~kwd), E(0)); // let var = ...
   110  			if( moreDeclarationExists() )
   111  				return new LetExpression(pos, var, "(system)", e, Body());
   112  			else
   113  				return new LetExpression(pos, var, "(system)", e, new VarExpression(pos, var));
   114  		}
   115  		else
   116  		{
   117  			string kwd = layer;
   118  			if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
   119  				return null; // none of {@lay, let, var, def} occurred, it's not a declaration
   120  
   121  			auto varpos = currentPosition();
   122  			string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
   123  
   124  			auto e = tryEat("(")
   125  				? parseLambdaAfterOpenParen(varpos)  // let var ( ...
   126  				: (eat("=", "after "~kwd), E(0));    // let var = ...
   127  			if( moreDeclarationExists() )
   128  				return new LetExpression(pos, var, layer, e, Body());
   129  			else
   130  				return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
   131  		}
   132  	}
   133  
   134  	AST TopLevelExpression()
   135  	{
   136  		/// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
   137  
   138  		auto pos = currentPosition();
   139  		auto e = E(0);
   140  		if( moreDeclarationExists() )
   141  			return new LetExpression(pos, "_", "", e, Body());
   142  		else
   143  			return e;
   144  	}
   145  
   146  	private bool moreDeclarationExists()
   147  	{
   148  		return (tryEat(";") || tryEat("in")) && !closingBracket();
   149  	}
   150  
   151  	private bool closingBracket()
   152  	{
   153  		return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
   154  	}
   155  
   156  	// [TODO] make this customizable from program
   157  	private static string[][] operator_perferences = [
   158  		["||"],
   159  		["&&"],
   160  		["!="],
   161  		["=="],
   162  		["<","<=",">",">="],
   163  		["|"],
   164  		["^"],
   165  		["&"],
   166  		["<<", ">>"],
   167  		["+","-"],
   168  		["~"],
   169  		["*","/","%"],
   170  		["^^","**"],
   171  		[".",".?"]
   172  	];
   173  
   174  	AST E(size_t level)
   175  	{
   176  		/// Expression ::= (Binary left-associative operators over) Funcall
   177  
   178  		AST rec(AST lhs)
   179  		{
   180  			if( closingBracket() )
   181  				return lhs;
   182  
   183  			auto pos = currentPosition();
   184  			foreach(op; operator_perferences[level])
   185  				if( tryEat(op) )
   186  					if( op[0]=='.' )
   187  						return rec(
   188  							new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, parseId()));
   189  					else
   190  					return rec(
   191  						new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
   192  			return lhs;
   193  		}
   194  
   195  		if( operator_perferences.length <= level )
   196  			return Funcall();
   197  		else
   198  			return rec(E(level+1));
   199  	}
   200  
   201  	AST Funcall()
   202  	{
   203  		/// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
   204  
   205  		auto e = BaseExpression();
   206  		while( tryEat("(") )
   207  		{
   208  			auto pos = currentPosition();
   209  			AST[] args;
   210  			while( !tryEat(")") ) {
   211  				if( lex.empty )
   212  					throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
   213  				args ~= E(0);
   214  				if( !tryEat(",") ) {
   215  					eat(")", "after function parameters");
   216  					break;
   217  				}
   218  			}
   219  			e = new FuncallExpression(e.pos, e, args);
   220  		}
   221  		return e;
   222  	}
   223  
   224  	AST BaseExpression()
   225  	{
   226  		if( lex.empty )
   227  			throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
   228  
   229  		auto pos = lex.front.pos;
   230  		if( lex.front.quoted )
   231  		{
   232  			scope(exit) lex.popFront;
   233  			return new StrLiteral(pos, lex.front.str);
   234  		}
   235  		if( isNumber(lex.front.str) )
   236  		{
   237  			scope(exit) lex.popFront;
   238  			return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
   239  		}
   240  		if( tryEat("@") )
   241  		{
   242  			auto lay = "@"~eatId("for layer ID");
   243  			eat("(", "for layered execution");
   244  			auto e = Body();
   245  			eat(")", "after "~lay~"(...");
   246  			return new LayeredExpression(pos, lay, e);
   247  		}
   248  		if( tryEat("(") )
   249  		{
   250  			auto e = Body();
   251  			eat(")", "after parenthesized expression");
   252  			return e;
   253  		}
   254  		if( tryEat("{") )
   255  		{
   256  			AST e = new FuncallExpression(pos, new VarExpression(pos,"{}"));
   257  			if( tryEat("}") )
   258  				return e;
   259  			for(;;)
   260  			{
   261  				string key = eatId("for table key", AllowQuoted);
   262  				eat(":", "after table key");
   263  				AST val = E(0);
   264  				e = new FuncallExpression(pos, new VarExpression(pos,".="),
   265  						e, new StrLiteral(pos,key), val);
   266  				if( !tryEat(",") )
   267  				{
   268  					eat("}", "for the end of table literal");
   269  					break;
   270  				}
   271  			}
   272  			return e;
   273  		}
   274  		if( tryEat("if") )
   275  		{
   276  			eat("(", "after if");
   277  			auto cond = E(0);
   278  			eat(")", "after if condition");
   279  			auto thenPos = lex.front.pos;
   280  			eat("{", "after if condition");
   281  			auto th = Body();
   282  			eat("}", "after if-then body");
   283  			auto el = doNothingExpression();
   284  			auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
   285  			if( tryEat("else") ) {
   286  				eat("{", "after else");
   287  				el = Body();
   288  				eat("}", "after else body");
   289  			}
   290  			return new FuncallExpression(pos, 
   291  				new VarExpression(pos, "if"),
   292  				cond,
   293  				new FunLiteral(thenPos, [], th),
   294  				new FunLiteral(elsePos, [], el)
   295  			);
   296  		}
   297  		if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
   298  		{
   299  			eat("(", "after fun");
   300  			return parseLambdaAfterOpenParen(pos);
   301  		}
   302  		scope(exit) lex.popFront;
   303  		return new VarExpression(pos, lex.front.str);
   304  	}
   305  
   306  	AST parseId()
   307  	{
   308  		scope(exit) lex.popFront;
   309  		return new StrLiteral(currentPosition(), lex.front.str);
   310  	}
   311  
   312  	AST parseLambdaAfterOpenParen(immutable LexPosition pos)
   313  	{
   314  		Parameter[] params;
   315  		while( !tryEat(")") )
   316  		{
   317  			params ~= parseParam();
   318  			if( !tryEat(",") ) {
   319  				eat(")", "after function parameters");
   320  				break;
   321  			}
   322  		}
   323  		eat("{", "after function parameters");
   324  		auto funbody = Body();
   325  		eat("}", "after function body");
   326  		return new FunLiteral(pos, params, funbody);
   327  	}
   328  
   329  	Parameter parseParam()
   330  	{
   331  		string var;
   332  		string[] lay;
   333  		while( !closingBracket() && !lex.empty && lex.front.str!="," )
   334  		{
   335  			auto pos = currentPosition();
   336  			string p = eatId("for function parameter", AllowQuoted);
   337  			if( p == "@" )
   338  				lay ~= "@" ~ eatId("after @", AllowQuoted);
   339  			else if( var.empty )
   340  				var = p;
   341  			else
   342  				throw genex!ParseException(pos, "one parameter has two names");
   343  		}
   344  		return new Parameter(var, lay);
   345  	}
   346  
   347  private:
   348  	Lexer lex;
   349  	this(Lexer lex) { this.lex = lex; }
   350  
   351  	bool isNumber(string s)
   352  	{
   353  		return find!(`a<'0' || '9'<a`)(s).empty;
   354  	}
   355  	
   356  	void eat(string kwd, lazy string msg)
   357  	{
   358  		if( !tryEat(kwd) )
   359  			if( lex.empty )
   360  				throw genex!UnexpectedEOF(
   361  					currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
   362  			else
   363  				throw genex!ParseException(
   364  					currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
   365  	}
   366  
   367  	bool tryEat(string kwd)
   368  	{
   369  		if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
   370  			return false;
   371  		lex.popFront;
   372  		return true;
   373  	}
   374  
   375  	enum {AllowQuoted=true, DisallowQuoted=false};
   376  	string eatId(lazy string msg, bool aq=DisallowQuoted)
   377  	{
   378  		if( lex.empty )
   379  			throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
   380  		if( !aq && lex.front.quoted )
   381  			throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
   382  		scope(exit) lex.popFront;
   383  		return lex.front.str;
   384  	}
   385  
   386  	AST doNothingExpression()
   387  	{
   388  		return new IntLiteral(currentPosition(), BigInt(178));
   389  	}
   390  
   391  	immutable(LexPosition) currentPosition()
   392  	{
   393  		return lex.empty ? null : lex.front.pos;
   394  	}
   395  }
   396  
   397  unittest
   398  {
   399  	mixin EasyAST;
   400  
   401  	assert_eq(parseString(`123`), intl(123));
   402  	assert_eq(parseString(`"foo"`), strl("foo"));
   403  	assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
   404  	assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
   405  	assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
   406  	assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
   407  	assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
   408  	assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
   409  	assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
   410  	assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
   411  	assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
   412  	assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
   413  	assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
   414  	assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
   415  	assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
   416  	assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
   417  	assert_eq(parseString(`if(1){}else{3}()()`),
   418  		call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
   419  	assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
   420  	assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
   421  	assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
   422  	assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
   423  	assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
   424  	assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
   425  		funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
   426  
   427  	assert_eq(parseString(`
   428  		let x = 100; #comment
   429  		let y = 200; #comment!!!!!
   430  			x+y
   431  	`),
   432  		let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
   433  	);
   434  
   435  	assert_eq(parseString(`
   436  		var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
   437  		fac(10)
   438  	`),
   439  		let("fac", "", fun(["x"],
   440  			call(var("if"),
   441  				call(var("<="), var("x"), intl(1)),
   442  				fun([], intl(1)),
   443  				fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
   444  			)),
   445  			call(var("fac"),intl(10))
   446  		)
   447  	);
   448  }
   449  
   450  unittest
   451  {
   452  	assert_throw!UnexpectedEOF(parseString(`1+`));
   453  	assert_throw!ParseException(parseString(`1+2}`));
   454  	assert_throw!UnexpectedEOF(parseString(`let "x"`));
   455  	assert_throw!UnexpectedEOF(parseString(`var`));
   456  	assert_throw!ParseException(parseString(`@val x ==`));
   457  	assert_throw!ParseException(parseString(`if(){1}`));
   458  	assert_throw!UnexpectedEOF(parseString(`f(`));
   459  }
   460  
   461  unittest
   462  {
   463  	mixin EasyAST;
   464  	assert_eq(parseString(`def foo(x) { x+1 }; foo`),
   465  		let("foo", "",
   466  			fun(["x"], call(var("+"), var("x"), intl(1))),
   467  			var("foo"))
   468  	);
   469  
   470  	assert_eq(parseString(`@@type ( x ) { x }`),
   471  		let("@type", "(system)", fun(["x"], var("x")), var("@type")) );
   472  
   473  	assert_eq(parseString(`{}`), call(var("{}")));
   474  	assert_eq(parseString(`{foo:1,"bar":2}`),
   475  		call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
   476  	assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
   477  	assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
   478  }