Artifact Content
Not logged in

Artifact 7ed0053850ffa0322a68b4e8632b9e8222715f8f


     1  /**
     2   * Authors: k.inaba
     3   * License: NYSL 0.9982 http://www.kmonos.net/nysl/
     4   *
     5   * Parser for Polemy programming language
     6   */
     7  module polemy.parse;
     8  import polemy._common;
     9  import polemy.lex;
    10  import polemy.ast;
    11  
    12  /// Thrown when encountered a syntax error
    13  
    14  class ParseException : Exception
    15  {
    16  	mixin ExceptionWithPosition;
    17  }
    18  
    19  /// Parse a string and return its AST
    20  /// Throws: ParseException, LexException, UnexpectedEOF
    21  
    22  AST parseString(S, T...)(S str, T fn_ln_cn)
    23  {
    24  	return parserFromString(str, fn_ln_cn).parse();
    25  }
    26  
    27  /// Parse the content of a file and return its AST
    28  /// Throws: ParseException, LexException, UnexpectedEOF
    29  
    30  AST parseFile(S, T...)(S filename, T ln_cn)
    31  {
    32  	return parserFromFile(filename, ln_cn).parse();
    33  }
    34  
    35  // Named Constructors of Parser
    36  
    37  private auto parserFromLexer(Lexer)(Lexer lex)
    38  	{ return new Parser!Lexer(lex); }
    39  
    40  private auto parserFromString(T...)(T params)
    41  	{ return parserFromLexer(lexerFromString(params)); }
    42  
    43  private auto parserFromFile(T...)(T params)
    44  	{ return parserFromLexer(lexerFromFile(params)); }
    45  
    46  // Parser
    47  
    48  private class Parser(Lexer)
    49  	if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
    50  {
    51  	AST parse()
    52  	{
    53  		auto e = Body();
    54  		if( !lex.empty )
    55  			throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
    56  		return e;
    57  	}
    58  
    59  	AST Body()
    60  	{
    61  		/// Body ::= Declaration
    62  		///        | TopLevelExpression
    63  
    64  		if( closingBracket() )
    65  			return doNothingExpression();
    66  
    67  		auto saved = lex.save;
    68  		if( auto e = Declaration() )
    69  			return e;
    70  		lex = saved;
    71  		return TopLevelExpression();
    72  	}
    73  
    74  	AST Declaration() // returns null if it is not a declaration
    75  	{
    76  		/// Declaration ::=
    77  		///    ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
    78  		///  | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
    79  		///  | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
    80  		///  | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
    81  
    82  		auto pos = currentPosition();
    83  		string layer = "";
    84  		bool layerRiseDecl = false;
    85  
    86  		if( tryEat("@") )
    87  		{
    88  			layer = "@" ~ eatId("after @", AllowQuoted);
    89  			if( layer == "@@" )
    90  			{
    91  				layer = "@" ~ eatId("after @@", AllowQuoted);
    92  				layerRiseDecl = true;
    93  			}
    94  			else
    95  			{
    96  				if( tryEat("(") )
    97  					return null; // @lay(...) expression, not a declaration
    98  			}
    99  		}
   100  
   101  		// [TODO] Refactor
   102  		if( layerRiseDecl )
   103  		{
   104  			string kwd = "@" ~ layer;
   105  			string var = layer;
   106  
   107  			auto e = tryEat("(")
   108  				? parseLambdaAfterOpenParen(pos)  // let var ( ...
   109  				: (eat("=", "after "~kwd), E(0)); // let var = ...
   110  			if( moreDeclarationExists() )
   111  				return new LetExpression(pos, var, "(system)", e, Body());
   112  			else
   113  				return new LetExpression(pos, var, "(system)", e, new VarExpression(pos, var));
   114  		}
   115  		else
   116  		{
   117  			string kwd = layer;
   118  			if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
   119  				return null; // none of {@lay, let, var, def} occurred, it's not a declaration
   120  
   121  			auto varpos = currentPosition();
   122  			string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
   123  
   124  			auto e = tryEat("(")
   125  				? parseLambdaAfterOpenParen(varpos)  // let var ( ...
   126  				: (eat("=", "after "~kwd), E(0));    // let var = ...
   127  			if( moreDeclarationExists() )
   128  				return new LetExpression(pos, var, layer, e, Body());
   129  			else
   130  				return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
   131  		}
   132  	}
   133  
   134  	AST TopLevelExpression()
   135  	{
   136  		/// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
   137  
   138  		auto pos = currentPosition();
   139  		auto e = E(0);
   140  		if( moreDeclarationExists() )
   141  			return new LetExpression(pos, "_", "", e, Body());
   142  		else
   143  			return e;
   144  	}
   145  
   146  	private bool moreDeclarationExists()
   147  	{
   148  		return (tryEat(";") || tryEat("in")) && !closingBracket();
   149  	}
   150  
   151  	private bool closingBracket()
   152  	{
   153  		return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
   154  	}
   155  
   156  	// [TODO] make this customizable from program
   157  	private static string[][] operator_perferences = [
   158  		["||"],
   159  		["&&"],
   160  		["!="],
   161  		["=="],
   162  		["<","<=",">",">="],
   163  		["|"],
   164  		["^"],
   165  		["&"],
   166  		["<<", ">>"],
   167  		["+","-"],
   168  		["~"],
   169  		["*","/","%"],
   170  		["^^","**"]
   171  	];
   172  
   173  	AST E(size_t level)
   174  	{
   175  		/// Expression ::= (Binary left-associative operators over) Funcall
   176  
   177  		AST rec(AST lhs)
   178  		{
   179  			if( closingBracket() )
   180  				return lhs;
   181  
   182  			auto pos = currentPosition();
   183  			foreach(op; operator_perferences[level])
   184  				if( tryEat(op) )
   185  					return rec(
   186  						new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
   187  			return lhs;
   188  		}
   189  
   190  		if( operator_perferences.length <= level )
   191  			return Funcall();
   192  		else
   193  			return rec(E(level+1));
   194  	}
   195  
   196  	AST Funcall()
   197  	{
   198  		/// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
   199  
   200  		auto e = BaseExpression();
   201  		while( tryEat("(") )
   202  		{
   203  			auto pos = currentPosition();
   204  			AST[] args;
   205  			while( !tryEat(")") ) {
   206  				if( lex.empty )
   207  					throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
   208  				args ~= E(0);
   209  				if( !tryEat(",") ) {
   210  					eat(")", "after function parameters");
   211  					break;
   212  				}
   213  			}
   214  			e = new FuncallExpression(e.pos, e, args);
   215  		}
   216  		return e;
   217  	}
   218  
   219  	AST BaseExpression()
   220  	{
   221  		if( lex.empty )
   222  			throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
   223  
   224  		auto pos = lex.front.pos;
   225  		if( lex.front.quoted )
   226  		{
   227  			scope(exit) lex.popFront;
   228  			return new StrLiteral(pos, lex.front.str);
   229  		}
   230  		if( isNumber(lex.front.str) )
   231  		{
   232  			scope(exit) lex.popFront;
   233  			return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
   234  		}
   235  		if( tryEat("@") )
   236  		{
   237  			auto lay = "@"~eatId("for layer ID");
   238  			eat("(", "for layered execution");
   239  			auto e = Body();
   240  			eat(")", "after "~lay~"(...");
   241  			return new LayeredExpression(pos, lay, e);
   242  		}
   243  		if( tryEat("(") )
   244  		{
   245  			auto e = Body();
   246  			eat(")", "after parenthesized expression");
   247  			return e;
   248  		}
   249  		if( tryEat("if") )
   250  		{
   251  			eat("(", "after if");
   252  			auto cond = E(0);
   253  			eat(")", "after if condition");
   254  			auto thenPos = lex.front.pos;
   255  			eat("{", "after if condition");
   256  			auto th = Body();
   257  			eat("}", "after if-then body");
   258  			auto el = doNothingExpression();
   259  			auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
   260  			if( tryEat("else") ) {
   261  				eat("{", "after else");
   262  				el = Body();
   263  				eat("}", "after else body");
   264  			}
   265  			return new FuncallExpression(pos, 
   266  				new VarExpression(pos, "if"),
   267  				cond,
   268  				new FunLiteral(thenPos, [], th),
   269  				new FunLiteral(elsePos, [], el)
   270  			);
   271  		}
   272  		if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
   273  		{
   274  			eat("(", "after fun");
   275  			return parseLambdaAfterOpenParen(pos);
   276  		}
   277  		scope(exit) lex.popFront;
   278  		return new VarExpression(pos, lex.front.str);
   279  	}
   280  
   281  	AST parseLambdaAfterOpenParen(immutable LexPosition pos)
   282  	{
   283  		Parameter[] params;
   284  		while( !tryEat(")") )
   285  		{
   286  			params ~= parseParam();
   287  			if( !tryEat(",") ) {
   288  				eat(")", "after function parameters");
   289  				break;
   290  			}
   291  		}
   292  		eat("{", "after function parameters");
   293  		auto funbody = Body();
   294  		eat("}", "after function body");
   295  		return new FunLiteral(pos, params, funbody);
   296  	}
   297  
   298  	Parameter parseParam()
   299  	{
   300  		string var;
   301  		string[] lay;
   302  		while( !closingBracket() && !lex.empty && lex.front.str!="," )
   303  		{
   304  			auto pos = currentPosition();
   305  			string p = eatId("for function parameter", AllowQuoted);
   306  			if( p == "@" )
   307  				lay ~= "@" ~ eatId("after @", AllowQuoted);
   308  			else if( var.empty )
   309  				var = p;
   310  			else
   311  				throw genex!ParseException(pos, "one parameter has two names");
   312  		}
   313  		return new Parameter(var, lay);
   314  	}
   315  
   316  private:
   317  	Lexer lex;
   318  	this(Lexer lex) { this.lex = lex; }
   319  
   320  	bool isNumber(string s)
   321  	{
   322  		return find!(`a<'0' || '9'<a`)(s).empty;
   323  	}
   324  	
   325  	void eat(string kwd, lazy string msg)
   326  	{
   327  		if( !tryEat(kwd) )
   328  			if( lex.empty )
   329  				throw genex!UnexpectedEOF(
   330  					currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
   331  			else
   332  				throw genex!ParseException(
   333  					currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
   334  	}
   335  
   336  	bool tryEat(string kwd)
   337  	{
   338  		if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
   339  			return false;
   340  		lex.popFront;
   341  		return true;
   342  	}
   343  
   344  	enum {AllowQuoted=true, DisallowQuoted=false};
   345  	string eatId(lazy string msg, bool aq=DisallowQuoted)
   346  	{
   347  		if( lex.empty )
   348  			throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
   349  		if( !aq && lex.front.quoted )
   350  			throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
   351  		scope(exit) lex.popFront;
   352  		return lex.front.str;
   353  	}
   354  
   355  	AST doNothingExpression()
   356  	{
   357  		return new IntLiteral(currentPosition(), BigInt(178));
   358  	}
   359  
   360  	immutable(LexPosition) currentPosition()
   361  	{
   362  		return lex.empty ? null : lex.front.pos;
   363  	}
   364  }
   365  
   366  unittest
   367  {
   368  	mixin EasyAST;
   369  
   370  	assert_eq(parseString(`123`), intl(123));
   371  	assert_eq(parseString(`"foo"`), strl("foo"));
   372  	assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
   373  	assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
   374  	assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
   375  	assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
   376  	assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
   377  	assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
   378  	assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
   379  	assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
   380  	assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
   381  	assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
   382  	assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
   383  	assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
   384  	assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
   385  	assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
   386  	assert_eq(parseString(`if(1){}else{3}()()`),
   387  		call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
   388  	assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
   389  	assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
   390  	assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
   391  	assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
   392  	assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
   393  	assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
   394  		funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
   395  
   396  	assert_eq(parseString(`
   397  		let x = 100; #comment
   398  		let y = 200; #comment!!!!!
   399  			x+y
   400  	`),
   401  		let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
   402  	);
   403  
   404  	assert_eq(parseString(`
   405  		var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
   406  		fac(10)
   407  	`),
   408  		let("fac", "", fun(["x"],
   409  			call(var("if"),
   410  				call(var("<="), var("x"), intl(1)),
   411  				fun([], intl(1)),
   412  				fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
   413  			)),
   414  			call(var("fac"),intl(10))
   415  		)
   416  	);
   417  }
   418  
   419  unittest
   420  {
   421  	assert_throw!UnexpectedEOF(parseString(`1+`));
   422  	assert_throw!ParseException(parseString(`1+2}`));
   423  	assert_throw!UnexpectedEOF(parseString(`let "x"`));
   424  	assert_throw!UnexpectedEOF(parseString(`var`));
   425  	assert_throw!ParseException(parseString(`@val x ==`));
   426  	assert_throw!ParseException(parseString(`if(){1}`));
   427  	assert_throw!UnexpectedEOF(parseString(`f(`));
   428  }
   429  
   430  unittest
   431  {
   432  	mixin EasyAST;
   433  	assert_eq(parseString(`def foo(x) { x+1 }; foo`),
   434  		let("foo", "",
   435  			fun(["x"], call(var("+"), var("x"), intl(1))),
   436  			var("foo"))
   437  	);
   438  
   439  	assert_eq(parseString(`@@type ( x ) { x }`),
   440  		let("@type", "(system)", fun(["x"], var("x")), var("@type")) );
   441  }