Artifact Content
Not logged in

Artifact 8212de2433d6e8184b4f1795239f72ee26ac023d


     1  /**
     2   * Authors: k.inaba
     3   * License: NYSL 0.9982 http://www.kmonos.net/nysl/
     4   *
     5   * Parser for Polemy programming language
     6   */
     7  module polemy.parse;
     8  import polemy._common;
     9  import polemy.lex;
    10  import polemy.ast;
    11  
    12  /// Thrown when encountered a syntax error
    13  
    14  class ParseException : Exception
    15  {
    16  	mixin ExceptionWithPosition;
    17  }
    18  
    19  /// Parse a string and return its AST
    20  /// Throws: ParseException, LexException, UnexpectedEOF
    21  
    22  AST parseString(S, T...)(S str, T fn_ln_cn)
    23  {
    24  	return parserFromString(str, fn_ln_cn).parse();
    25  }
    26  
    27  /// Parse the content of a file and return its AST
    28  /// Throws: ParseException, LexException, UnexpectedEOF
    29  
    30  AST parseFile(S, T...)(S filename, T ln_cn)
    31  {
    32  	return parserFromFile(filename, ln_cn).parse();
    33  }
    34  
    35  // Named Constructors of Parser
    36  
    37  private auto parserFromLexer(Lexer)(Lexer lex)
    38  	{ return new Parser!Lexer(lex); }
    39  
    40  private auto parserFromString(T...)(T params)
    41  	{ return parserFromLexer(lexerFromString(params)); }
    42  
    43  private auto parserFromFile(T...)(T params)
    44  	{ return parserFromLexer(lexerFromFile(params)); }
    45  
    46  // Parser
    47  
    48  private class Parser(Lexer)
    49  	if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
    50  {
    51  	AST parse()
    52  	{
    53  		auto e = Body();
    54  		if( !lex.empty )
    55  			throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
    56  		return e;
    57  	}
    58  
    59  	AST Body()
    60  	{
    61  		/// Body ::= Declaration
    62  		///        | TopLevelExpression
    63  
    64  		if( closingBracket() )
    65  			return doNothingExpression();
    66  
    67  		auto saved = lex.save;
    68  		if( auto e = Declaration() )
    69  			return e;
    70  		lex = saved;
    71  		return TopLevelExpression();
    72  	}
    73  
    74  	AST Declaration() // returns null if it is not a declaration
    75  	{
    76  		/// Declaration ::=
    77  		///    ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
    78  		///  | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
    79  
    80  		auto pos = currentPosition();
    81  		string layer = "";
    82  
    83  		if( tryEat("@") )
    84  		{
    85  			layer = "@" ~ eatId("after @", AllowQuoted);
    86  			if( tryEat("(") )
    87  				return null; // @lay(...) expression, not a declaration
    88  		}
    89  
    90  		string kwd = layer;
    91  		if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
    92  			return null; // none of {@lay, let, var, def} occurred, it's not a declaration
    93  
    94  		auto varpos = currentPosition();
    95  		string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
    96  
    97  		auto e = tryEat("(")
    98  			? parseLambdaAfterOpenParen(varpos)  // let var ( ...
    99  			: (eat("=", "after "~kwd), E(0));    // let var = ...
   100  
   101  		if( moreDeclarationExists() )
   102  			return new LetExpression(pos, var, layer, e, Body());
   103  		else
   104  			return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
   105  	}
   106  
   107  	AST TopLevelExpression()
   108  	{
   109  		/// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
   110  
   111  		auto pos = currentPosition();
   112  		auto e = E(0);
   113  		if( moreDeclarationExists() )
   114  			return new LetExpression(pos, "_", "", e, Body());
   115  		else
   116  			return e;
   117  	}
   118  
   119  	private bool moreDeclarationExists()
   120  	{
   121  		return (tryEat(";") || tryEat("in")) && !closingBracket();
   122  	}
   123  
   124  	private bool closingBracket()
   125  	{
   126  		return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
   127  	}
   128  
   129  	// [TODO] make this customizable from program
   130  	private static string[][] operator_perferences = [
   131  		["||"],
   132  		["&&"],
   133  		["!="],
   134  		["=="],
   135  		["<","<=",">",">="],
   136  		["|"],
   137  		["^"],
   138  		["&"],
   139  		["<<", ">>"],
   140  		["+","-"],
   141  		["~"],
   142  		["*","/","%"],
   143  		["^^","**"]
   144  	];
   145  
   146  	AST E(size_t level)
   147  	{
   148  		/// Expression ::= (Binary left-associative operators over) Funcall
   149  
   150  		AST rec(AST lhs)
   151  		{
   152  			if( closingBracket() )
   153  				return lhs;
   154  
   155  			auto pos = currentPosition();
   156  			foreach(op; operator_perferences[level])
   157  				if( tryEat(op) )
   158  					return rec(
   159  						new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
   160  			return lhs;
   161  		}
   162  
   163  		if( operator_perferences.length <= level )
   164  			return Funcall();
   165  		else
   166  			return rec(E(level+1));
   167  	}
   168  
   169  	AST Funcall()
   170  	{
   171  		/// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
   172  
   173  		auto e = BaseExpression();
   174  		while( tryEat("(") )
   175  		{
   176  			auto pos = currentPosition();
   177  			AST[] args;
   178  			while( !tryEat(")") ) {
   179  				if( lex.empty )
   180  					throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
   181  				args ~= E(0);
   182  				if( !tryEat(",") ) {
   183  					eat(")", "after function parameters");
   184  					break;
   185  				}
   186  			}
   187  			e = new FuncallExpression(e.pos, e, args);
   188  		}
   189  		return e;
   190  	}
   191  
   192  	AST BaseExpression()
   193  	{
   194  		if( lex.empty )
   195  			throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
   196  
   197  		auto pos = lex.front.pos;
   198  		if( lex.front.quoted )
   199  		{
   200  			scope(exit) lex.popFront;
   201  			return new StrLiteral(pos, lex.front.str);
   202  		}
   203  		if( isNumber(lex.front.str) )
   204  		{
   205  			scope(exit) lex.popFront;
   206  			return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
   207  		}
   208  		if( tryEat("@") )
   209  		{
   210  			auto lay = "@"~eatId("for layer ID");
   211  			eat("(", "for layered execution");
   212  			auto e = Body();
   213  			eat(")", "after "~lay~"(...");
   214  			return new LayeredExpression(pos, lay, e);
   215  		}
   216  		if( tryEat("(") )
   217  		{
   218  			auto e = Body();
   219  			eat(")", "after parenthesized expression");
   220  			return e;
   221  		}
   222  		if( tryEat("if") )
   223  		{
   224  			eat("(", "after if");
   225  			auto cond = E(0);
   226  			eat(")", "after if condition");
   227  			auto thenPos = lex.front.pos;
   228  			eat("{", "after if condition");
   229  			auto th = Body();
   230  			eat("}", "after if-then body");
   231  			auto el = doNothingExpression();
   232  			auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
   233  			if( tryEat("else") ) {
   234  				eat("{", "after else");
   235  				el = Body();
   236  				eat("}", "after else body");
   237  			}
   238  			return new FuncallExpression(pos, 
   239  				new VarExpression(pos, "if"),
   240  				cond,
   241  				new FunLiteral(thenPos, [], th),
   242  				new FunLiteral(elsePos, [], el)
   243  			);
   244  		}
   245  		if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
   246  		{
   247  			eat("(", "after fun");
   248  			return parseLambdaAfterOpenParen(pos);
   249  		}
   250  		scope(exit) lex.popFront;
   251  		return new VarExpression(pos, lex.front.str);
   252  	}
   253  
   254  	AST parseLambdaAfterOpenParen(immutable LexPosition pos)
   255  	{
   256  		Parameter[] params;
   257  		while( !tryEat(")") )
   258  		{
   259  			params ~= parseParam();
   260  			if( !tryEat(",") ) {
   261  				eat(")", "after function parameters");
   262  				break;
   263  			}
   264  		}
   265  		eat("{", "after function parameters");
   266  		auto funbody = Body();
   267  		eat("}", "after function body");
   268  		return new FunLiteral(pos, params, funbody);
   269  	}
   270  
   271  	Parameter parseParam()
   272  	{
   273  		string var;
   274  		string[] lay;
   275  		while( !closingBracket() && !lex.empty && lex.front.str!="," )
   276  		{
   277  			auto pos = currentPosition();
   278  			string p = eatId("for function parameter", AllowQuoted);
   279  			if( p == "@" )
   280  				lay ~= "@" ~ eatId("after @", AllowQuoted);
   281  			else if( var.empty )
   282  				var = p;
   283  			else
   284  				throw genex!ParseException(pos, "one parameter has two names");
   285  		}
   286  		return new Parameter(var, lay);
   287  	}
   288  
   289  private:
   290  	Lexer lex;
   291  	this(Lexer lex) { this.lex = lex; }
   292  
   293  	bool isNumber(string s)
   294  	{
   295  		return find!(`a<'0' || '9'<a`)(s).empty;
   296  	}
   297  	
   298  	void eat(string kwd, lazy string msg)
   299  	{
   300  		if( !tryEat(kwd) )
   301  			if( lex.empty )
   302  				throw genex!UnexpectedEOF(
   303  					currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
   304  			else
   305  				throw genex!ParseException(
   306  					currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
   307  	}
   308  
   309  	bool tryEat(string kwd)
   310  	{
   311  		if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
   312  			return false;
   313  		lex.popFront;
   314  		return true;
   315  	}
   316  
   317  	enum {AllowQuoted=true, DisallowQuoted=false};
   318  	string eatId(lazy string msg, bool aq=DisallowQuoted)
   319  	{
   320  		if( lex.empty )
   321  			throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
   322  		if( !aq && lex.front.quoted )
   323  			throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
   324  		scope(exit) lex.popFront;
   325  		return lex.front.str;
   326  	}
   327  
   328  	AST doNothingExpression()
   329  	{
   330  		return new IntLiteral(currentPosition(), BigInt(178));
   331  	}
   332  
   333  	immutable(LexPosition) currentPosition()
   334  	{
   335  		return lex.empty ? null : lex.front.pos;
   336  	}
   337  }
   338  
   339  unittest
   340  {
   341  	mixin EasyAST;
   342  
   343  	assert_eq(parseString(`123`), intl(123));
   344  	assert_eq(parseString(`"foo"`), strl("foo"));
   345  	assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
   346  	assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
   347  	assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
   348  	assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
   349  	assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
   350  	assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
   351  	assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
   352  	assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
   353  	assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
   354  	assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
   355  	assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
   356  	assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
   357  	assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
   358  	assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
   359  	assert_eq(parseString(`if(1){}else{3}()()`),
   360  		call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
   361  	assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
   362  	assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
   363  	assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
   364  	assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
   365  	assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
   366  	assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
   367  		funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
   368  
   369  	assert_eq(parseString(`
   370  		let x = 100; #comment
   371  		let y = 200; #comment!!!!!
   372  			x+y
   373  	`),
   374  		let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
   375  	);
   376  
   377  	assert_eq(parseString(`
   378  		var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
   379  		fac(10)
   380  	`),
   381  		let("fac", "", fun(["x"],
   382  			call(var("if"),
   383  				call(var("<="), var("x"), intl(1)),
   384  				fun([], intl(1)),
   385  				fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
   386  			)),
   387  			call(var("fac"),intl(10))
   388  		)
   389  	);
   390  }
   391  
   392  unittest
   393  {
   394  	assert_throw!UnexpectedEOF(parseString(`1+`));
   395  	assert_throw!ParseException(parseString(`1+2}`));
   396  	assert_throw!UnexpectedEOF(parseString(`let "x"`));
   397  	assert_throw!UnexpectedEOF(parseString(`var`));
   398  	assert_throw!ParseException(parseString(`@val x ==`));
   399  	assert_throw!ParseException(parseString(`if(){1}`));
   400  	assert_throw!UnexpectedEOF(parseString(`f(`));
   401  }
   402  
   403  unittest
   404  {
   405  	mixin EasyAST;
   406  	assert_eq(parseString(`def foo(x) { x+1 }; foo`),
   407  		let("foo", "",
   408  			fun(["x"], call(var("+"), var("x"), intl(1))),
   409  			var("foo"))
   410  	);
   411  }