Artifact Content
Not logged in

Artifact 25ec86a59533b939ec72435f0a8e4b97b6209f4e


     1  /**
     2   * Authors: k.inaba
     3   * License: NYSL 0.9982 http://www.kmonos.net/nysl/
     4   *
     5   * Parser for Polemy programming language
     6   */
     7  module polemy.parse;
     8  import polemy._common;
     9  import polemy.failure;
    10  import polemy.lex;
    11  import polemy.ast;
    12  
    13  /// Parse a string and return its AST
    14  /// Throws: ParseException, LexException, UnexpectedEOF
    15  
    16  AST parseString(S, T...)(S str, T fn_ln_cn)
    17  {
    18  	return parserFromString(str, fn_ln_cn).parse();
    19  }
    20  
    21  /// Parse the content of a file and return its AST
    22  /// Throws: ParseException, LexException, UnexpectedEOF
    23  
    24  AST parseFile(S, T...)(S filename, T ln_cn)
    25  {
    26  	return parserFromFile(filename, ln_cn).parse();
    27  }
    28  
    29  // Named Constructors of Parser
    30  
    31  private auto parserFromLexer(Lexer)(Lexer lex)
    32  	{ return new Parser!Lexer(lex); }
    33  
    34  private auto parserFromString(T...)(T params)
    35  	{ return parserFromLexer(lexerFromString(params)); }
    36  
    37  private auto parserFromFile(T...)(T params)
    38  	{ return parserFromLexer(lexerFromFile(params)); }
    39  
    40  // Parser
    41  
    42  private class Parser(Lexer)
    43  	if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
    44  {
    45  	AST parse()
    46  	{
    47  		auto e = Body();
    48  		if( !lex.empty )
    49  			throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
    50  		return e;
    51  	}
    52  
    53  	AST Body()
    54  	{
    55  		/// Body ::= Declaration
    56  		///        | TopLevelExpression
    57  
    58  		if( closingBracket() )
    59  			return doNothingExpression();
    60  
    61  		auto saved = lex.save;
    62  		if( auto e = Declaration() )
    63  			return e;
    64  		lex = saved;
    65  		return TopLevelExpression();
    66  	}
    67  
    68  	AST Declaration() // returns null if it is not a declaration
    69  	{
    70  		/// Declaration ::=
    71  		///    ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
    72  		///  | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
    73  		///  | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
    74  		///  | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
    75  
    76  		auto pos = currentPosition();
    77  		string layer = "";
    78  		bool layerRiseDecl = false;
    79  
    80  		if( tryEat("@") )
    81  		{
    82  			layer = "@" ~ eatId("after @", AllowQuoted);
    83  			if( layer == "@@" )
    84  			{
    85  				layer = "@" ~ eatId("after @@", AllowQuoted);
    86  				layerRiseDecl = true;
    87  			}
    88  			else
    89  			{
    90  				if( tryEat("(") )
    91  					return null; // @lay(...) expression, not a declaration
    92  			}
    93  		}
    94  
    95  		// [TODO] Refactor
    96  		if( layerRiseDecl )
    97  		{
    98  			string kwd = "@" ~ layer;
    99  			string var = layer;
   100  
   101  			auto e = tryEat("(")
   102  				? parseLambdaAfterOpenParen(pos)  // let var ( ...
   103  				: (eat("=", "after "~kwd), E(0)); // let var = ...
   104  			if( moreDeclarationExists() )
   105  				return new LetExpression(pos, var, "(system)", e, Body());
   106  			else
   107  				return new LetExpression(pos, var, "(system)", e, new VarExpression(pos, var));
   108  		}
   109  		else
   110  		{
   111  			string kwd = layer;
   112  			if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
   113  				return null; // none of {@lay, let, var, def} occurred, it's not a declaration
   114  
   115  			auto varpos = currentPosition();
   116  			string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
   117  
   118  			auto e = tryEat("(")
   119  				? parseLambdaAfterOpenParen(varpos)  // let var ( ...
   120  				: (eat("=", "after "~kwd), E(0));    // let var = ...
   121  			if( moreDeclarationExists() )
   122  				return new LetExpression(pos, var, layer, e, Body());
   123  			else
   124  				return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
   125  		}
   126  	}
   127  
   128  	AST TopLevelExpression()
   129  	{
   130  		/// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
   131  
   132  		auto pos = currentPosition();
   133  		auto e = E(0);
   134  		if( moreDeclarationExists() )
   135  			return new LetExpression(pos, "_", "", e, Body());
   136  		else
   137  			return e;
   138  	}
   139  
   140  	private bool moreDeclarationExists()
   141  	{
   142  		return (tryEat(";") || tryEat("in")) && !closingBracket();
   143  	}
   144  
   145  	private bool closingBracket()
   146  	{
   147  		return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
   148  	}
   149  
   150  	// [TODO] make this customizable from program
   151  	private static string[][] operator_perferences = [
   152  		["||"],
   153  		["&&"],
   154  		["!="],
   155  		["=="],
   156  		["<","<=",">",">="],
   157  		["|"],
   158  		["^"],
   159  		["&"],
   160  		["<<", ">>"],
   161  		["+","-"],
   162  		["~"],
   163  		["*","/","%"],
   164  		["^^","**"],
   165  		[".",".?"]
   166  	];
   167  
   168  	AST E(size_t level)
   169  	{
   170  		/// Expression ::= (Binary left-associative operators over) Funcall
   171  
   172  		AST rec(AST lhs)
   173  		{
   174  			if( closingBracket() )
   175  				return lhs;
   176  
   177  			auto pos = currentPosition();
   178  			foreach(op; operator_perferences[level])
   179  				if( tryEat(op) )
   180  					if( op[0]=='.' )
   181  						return rec(
   182  							new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, parseId()));
   183  					else
   184  					return rec(
   185  						new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
   186  			return lhs;
   187  		}
   188  
   189  		if( operator_perferences.length <= level )
   190  			return Funcall();
   191  		else
   192  			return rec(E(level+1));
   193  	}
   194  
   195  	AST Funcall()
   196  	{
   197  		/// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
   198  
   199  		auto e = BaseExpression();
   200  		for(;;)
   201  			if( tryEat("(") )
   202  			{
   203  				auto pos = currentPosition();
   204  				AST[] args;
   205  				while( !tryEat(")") ) {
   206  					if( lex.empty )
   207  						throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
   208  					args ~= E(0);
   209  					if( !tryEat(",") ) {
   210  						eat(")", "after function parameters");
   211  						break;
   212  					}
   213  				}
   214  				e = new FuncallExpression(e.pos, e, args);
   215  			}
   216  			else if( tryEat("{") )
   217  			{
   218  				e = parseTableSetAfterBrace(e);
   219  			}
   220  			else
   221  				break;
   222  		return e;
   223  	}
   224  
   225  	AST parseTableSetAfterBrace(AST e)
   226  	{
   227  		if( tryEat("}") )
   228  			return e;
   229  		auto pos = currentPosition();
   230  		for(;;)
   231  		{
   232  			string key = eatId("for table key", AllowQuoted);
   233  			eat(":", "after table key");
   234  			AST val = E(0);
   235  			e = new FuncallExpression(pos, new VarExpression(pos,".="),
   236  					e, new StrLiteral(pos,key), val);
   237  			if( !tryEat(",") )
   238  			{
   239  				eat("}", "for the end of table literal");
   240  				break;
   241  			}
   242  		}
   243  		return e;
   244  	}
   245  
   246  	AST BaseExpression()
   247  	{
   248  		if( lex.empty )
   249  			throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
   250  
   251  		auto pos = lex.front.pos;
   252  		if( lex.front.quoted )
   253  		{
   254  			scope(exit) lex.popFront;
   255  			return new StrLiteral(pos, lex.front.str);
   256  		}
   257  		if( isNumber(lex.front.str) )
   258  		{
   259  			scope(exit) lex.popFront;
   260  			return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
   261  		}
   262  		if( tryEat("@") )
   263  		{
   264  			auto lay = "@"~eatId("for layer ID");
   265  			eat("(", "for layered execution");
   266  			auto e = Body();
   267  			eat(")", "after "~lay~"(...");
   268  			return new LayeredExpression(pos, lay, e);
   269  		}
   270  		if( tryEat("(") )
   271  		{
   272  			auto e = Body();
   273  			eat(")", "after parenthesized expression");
   274  			return e;
   275  		}
   276  		if( tryEat("{") )
   277  		{
   278  			AST e = new FuncallExpression(pos, new VarExpression(pos,"{}"));
   279  			return parseTableSetAfterBrace(e);
   280  		}
   281  		if( tryEat("if") )
   282  		{
   283  			eat("(", "after if");
   284  			auto cond = E(0);
   285  			eat(")", "after if condition");
   286  			auto thenPos = lex.front.pos;
   287  			eat("{", "after if condition");
   288  			auto th = Body();
   289  			eat("}", "after if-then body");
   290  			auto el = doNothingExpression();
   291  			auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
   292  			if( tryEat("else") ) {
   293  				eat("{", "after else");
   294  				el = Body();
   295  				eat("}", "after else body");
   296  			}
   297  			return new FuncallExpression(pos, 
   298  				new VarExpression(pos, "if"),
   299  				cond,
   300  				new FunLiteral(thenPos, [], th),
   301  				new FunLiteral(elsePos, [], el)
   302  			);
   303  		}
   304  		if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
   305  		{
   306  			eat("(", "after fun");
   307  			return parseLambdaAfterOpenParen(pos);
   308  		}
   309  		scope(exit) lex.popFront;
   310  		return new VarExpression(pos, lex.front.str);
   311  	}
   312  
   313  	AST parseId()
   314  	{
   315  		scope(exit) lex.popFront;
   316  		return new StrLiteral(currentPosition(), lex.front.str);
   317  	}
   318  
   319  	AST parseLambdaAfterOpenParen(immutable LexPosition pos)
   320  	{
   321  		Parameter[] params;
   322  		while( !tryEat(")") )
   323  		{
   324  			params ~= parseParam();
   325  			if( !tryEat(",") ) {
   326  				eat(")", "after function parameters");
   327  				break;
   328  			}
   329  		}
   330  		eat("{", "after function parameters");
   331  		auto funbody = Body();
   332  		eat("}", "after function body");
   333  		return new FunLiteral(pos, params, funbody);
   334  	}
   335  
   336  	Parameter parseParam()
   337  	{
   338  		string var;
   339  		string[] lay;
   340  		while( !closingBracket() && !lex.empty && lex.front.str!="," )
   341  		{
   342  			auto pos = currentPosition();
   343  			string p = eatId("for function parameter", AllowQuoted);
   344  			if( p == "@" )
   345  				lay ~= "@" ~ eatId("after @", AllowQuoted);
   346  			else if( var.empty )
   347  				var = p;
   348  			else
   349  				throw genex!ParseException(pos, "one parameter has two names");
   350  		}
   351  		return new Parameter(var, lay);
   352  	}
   353  
   354  private:
   355  	Lexer lex;
   356  	this(Lexer lex) { this.lex = lex; }
   357  
   358  	bool isNumber(string s)
   359  	{
   360  		return find!(`a<'0' || '9'<a`)(s).empty;
   361  	}
   362  	
   363  	void eat(string kwd, lazy string msg)
   364  	{
   365  		if( !tryEat(kwd) )
   366  			if( lex.empty )
   367  				throw genex!UnexpectedEOF(
   368  					currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
   369  			else
   370  				throw genex!ParseException(
   371  					currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
   372  	}
   373  
   374  	bool tryEat(string kwd)
   375  	{
   376  		if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
   377  			return false;
   378  		lex.popFront;
   379  		return true;
   380  	}
   381  
   382  	enum {AllowQuoted=true, DisallowQuoted=false};
   383  	string eatId(lazy string msg, bool aq=DisallowQuoted)
   384  	{
   385  		if( lex.empty )
   386  			throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
   387  		if( !aq && lex.front.quoted )
   388  			throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
   389  		scope(exit) lex.popFront;
   390  		return lex.front.str;
   391  	}
   392  
   393  	AST doNothingExpression()
   394  	{
   395  		return new IntLiteral(currentPosition(), BigInt(178));
   396  	}
   397  
   398  	immutable(LexPosition) currentPosition()
   399  	{
   400  		return lex.empty ? null : lex.front.pos;
   401  	}
   402  }
   403  
   404  unittest
   405  {
   406  	mixin EasyAST;
   407  
   408  	assert_eq(parseString(`123`), intl(123));
   409  	assert_eq(parseString(`"foo"`), strl("foo"));
   410  	assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
   411  	assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
   412  	assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
   413  	assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
   414  	assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
   415  	assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
   416  	assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
   417  	assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
   418  	assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
   419  	assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
   420  	assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
   421  	assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
   422  	assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
   423  	assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
   424  	assert_eq(parseString(`if(1){}else{3}()()`),
   425  		call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
   426  	assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
   427  	assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
   428  	assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
   429  	assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
   430  	assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
   431  	assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
   432  		funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
   433  
   434  	assert_eq(parseString(`
   435  		let x = 100; #comment
   436  		let y = 200; #comment!!!!!
   437  			x+y
   438  	`),
   439  		let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
   440  	);
   441  
   442  	assert_eq(parseString(`
   443  		var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
   444  		fac(10)
   445  	`),
   446  		let("fac", "", fun(["x"],
   447  			call(var("if"),
   448  				call(var("<="), var("x"), intl(1)),
   449  				fun([], intl(1)),
   450  				fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
   451  			)),
   452  			call(var("fac"),intl(10))
   453  		)
   454  	);
   455  }
   456  
   457  unittest
   458  {
   459  	assert_throw!UnexpectedEOF(parseString(`1+`));
   460  	assert_throw!ParseException(parseString(`1+2}`));
   461  	assert_throw!UnexpectedEOF(parseString(`let "x"`));
   462  	assert_throw!UnexpectedEOF(parseString(`var`));
   463  	assert_throw!ParseException(parseString(`@val x ==`));
   464  	assert_throw!ParseException(parseString(`if(){1}`));
   465  	assert_throw!UnexpectedEOF(parseString(`f(`));
   466  }
   467  
   468  unittest
   469  {
   470  	mixin EasyAST;
   471  	assert_eq(parseString(`def foo(x) { x+1 }; foo`),
   472  		let("foo", "",
   473  			fun(["x"], call(var("+"), var("x"), intl(1))),
   474  			var("foo"))
   475  	);
   476  
   477  	assert_eq(parseString(`@@type ( x ) { x }`),
   478  		let("@type", "(system)", fun(["x"], var("x")), var("@type")) );
   479  
   480  	assert_eq(parseString(`{}`), call(var("{}")));
   481  	assert_eq(parseString(`{foo:1,"bar":2}`),
   482  		call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
   483  	assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
   484  	assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
   485  	assert_eq(parseString(`x{y:1}`), call(var(".="),var("x"),strl("y"),intl(1)));
   486  }