Artifact Content
Not logged in

Artifact f6a85fc83dd90cb0af328ba2f993bbc4b6407717


/**
 * Authors: k.inaba
 * License: NYSL 0.9982 http://www.kmonos.net/nysl/
 *
 * Parser for Polemy programming language
 */
module polemy.parse;
import polemy._common;
import polemy.failure;
import polemy.lex;
import polemy.ast;
import polemy.layer;
import polemy.fresh;

/// Parse a string and return its AST

AST parseString(S, T...)(S str, T fn_ln_cn)
{
	return parserFromString(str, fn_ln_cn).parse();
}

/// Parse the content of a file and return its AST

AST parseFile(S, T...)(S filename, T ln_cn)
{
	return parserFromFile(filename, ln_cn).parse();
}

// Named Constructors of Parser

private auto parserFromLexer(Lexer)(Lexer lex)
	{ return new Parser!Lexer(lex); }

private auto parserFromString(T...)(T params)
	{ return parserFromLexer(lexerFromString(params)); }

private auto parserFromFile(T...)(T params)
	{ return parserFromLexer(lexerFromFile(params)); }

// Parser

private class Parser(Lexer)
	if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
{
	AST parse()
	{
		auto e = Body();
		if( !lex.empty )
			throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
		return e;
	}

	AST Body()
	{
		/// Body ::= Declaration
		///        | TopLevelExpression

		if( closingBracket() )
			return doNothingExpression();

		auto saved = lex.save;
		if( auto e = Declaration() )
			return e;
		lex = saved;
		return TopLevelExpression();
	}

	AST Declaration() // returns null if it is not a declaration
	{
		/// Declaration ::=
		///    ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
		///  | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
		///  | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
		///  | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?

		auto pos = currentPosition();
		Layer layer = "";
		bool layerLiftDecl = false;

		if( tryEat("@") )
		{
			layer = "@" ~ eatId("after @", AllowQuoted);
			if( layer == "@@" )
			{
				layer = "@" ~ eatId("after @@", AllowQuoted);
				layerLiftDecl = true;
			}
			else
			{
				if( tryEat("(") )
					return null; // @lay(...) expression, not a declaration
			}
		}

		// [TODO] Refactor
		if( layerLiftDecl )
		{
			string kwd = "@" ~ layer;
			string var = layer;

			auto e = tryEat("(")
				? parseLambdaAfterOpenParen(pos)  // let var ( ...
				: (eat("=", "after "~kwd), E(0)); // let var = ...
			if( moreDeclarationExists() )
				return new Let(pos, var, SystemLayer, e, Body());
			else
				return new Let(pos, var, SystemLayer, e,
					new Lay(pos, SystemLayer, new Var(pos, var))
				);
		}
		else
		{
			string kwd = layer;
			if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
				return null; // none of {@lay, let, var, def} occurred, it's not a declaration

			auto varpos = currentPosition();
			string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable

			auto e = tryEat("(")
				? parseLambdaAfterOpenParen(pos)  // let var ( ...
				: (eat("=", "after "~kwd), E(0)); // let var = ...
			if( moreDeclarationExists() )
				return new Let(pos, var, layer, e, Body());
			else
				return new Let(pos, var, layer, e, new Var(varpos, var));
		}
	}

	AST TopLevelExpression()
	{
		/// TopLevelExpression ::= Expression ([";"|"in"] Body?)?

		auto pos = currentPosition();
		auto e = E(0);
		if( moreDeclarationExists() )
			return new Let(pos, "_", "", e, Body());
		else
			return e;
	}

	private bool moreDeclarationExists()
	{
		return (tryEat(";") || tryEat("in")) && !closingBracket();
	}

	private bool closingBracket()
	{
		return lex.empty || !lex.front.quoted && ["}",")","]",","].canFind(lex.front.str);
	}

	// [TODO] make this customizable from program
	private static string[][] operator_perferences = [
		["||"],
		["&&"],
		["!="],
		["=="],
		["<","<=",">",">="],
		["|"],
		["^"],
		["&"],
		["<<", ">>", "<<<", ">>>"],
		["+","-"],
		["~"],
		["*","/","%"],
		["^^","**"],
		[".",".?"]
	];

	AST E(size_t level)
	{
		/// Expression ::= (Binary left-associative operators over) Funcall

		AST rec(AST lhs)
		{
			if( closingBracket() )
				return lhs;

			auto pos = currentPosition();
			foreach(op; operator_perferences[level])
				if( tryEat(op) )
					if( op[0]=='.' )
						return rec(
							new App(lhs.pos, new Var(pos, op), lhs, parseId()));
					else
						return rec(
							new App(lhs.pos, new Var(pos, op), lhs, E(level+1)));
			return lhs;
		}

		if( operator_perferences.length <= level )
			return Funcall();
		else
			return rec(E(level+1));
	}

	AST Funcall()
	{
		/// Funcall ::= BaseExpression ["(" Expression%"," ")" | "{" ENTRIES "}"]*

		auto e = BaseExpression();
		for(;;)
			if( tryEat("(") )
			{
				auto pos = currentPosition();
				AST[] args;
				while( !tryEat(")") ) {
					if( lex.empty )
						throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
					args ~= E(0);
					if( !tryEat(",") ) {
						eat(")", "after function parameters");
						break;
					}
				}
				e = new App(e.pos, e, args);
			}
			else if( tryEat("{") )
			{
				e = parseTableSetAfterBrace(e);
			}
			else
				break;
		return e;
	}

	AST parseTableSetAfterBrace(AST e)
	{
		/// TableSet ::= "{" (ID ":" E) % "," "}"
		
		if( tryEat("}") )
			return e;
		auto pos = currentPosition();
		for(;;)
		{
			string key = eatId("for table key", AllowQuoted);
			eat(":", "after table key");
			AST val = E(0);
			e = new App(pos, new Var(pos,".="),
					e, new Str(pos,key), val);
			if( !tryEat(",") )
			{
				eat("}", "for the end of table literal");
				break;
			}
		}
		return e;
	}

	AST BaseExpression()
	{
		if( lex.empty )
			throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");

		auto pos = lex.front.pos;
		if( lex.front.quoted )
		{
			scope(exit) lex.popFront;
			return new Str(pos, lex.front.str);
		}
		if( isNumber(lex.front.str) )
		{
			scope(exit) lex.popFront;
			return new Int(pos, BigInt(cast(string)lex.front.str));
		}
		if( tryEat("@") )
		{
			auto lay = "@"~eatId("for layer ID");
			eat("(", "for layered execution");
			auto e = Body();
			eat(")", "after "~lay~"(...");
			return new Lay(pos, lay, e);
		}
		if( tryEat("(") )
		{
			auto e = Body();
			eat(")", "after parenthesized expression");
			return e;
		}
		if( tryEat("{") )
		{
			AST e = new App(pos, new Var(pos,"{}"));
			return parseTableSetAfterBrace(e);
		}
		if( tryEat("if") )
		{
			return parseIfAfterIf(pos);
		}
		if( tryEat("case") )
		{
			return parsePatternMatch(pos);
		}
		if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
		{
			eat("(", "after fun");
			return parseLambdaAfterOpenParen(pos);
		}
		scope(exit) lex.popFront;
		return new Var(pos, lex.front.str);
	}

	AST parseIfAfterIf(LexPosition pos)
	{
		auto cond = E(0);
		auto thenPos = currentPosition();
		if(!tryEat(":")) {
			eat("then", "after if condition");
			tryEat(":");
		}
		AST th = E(0);
		auto el = doNothingExpression();
		auto elsePos = currentPosition();
		if( tryEat("else") ) {
			tryEat(":");
			el = E(0);
		}
		return new App(pos, new Var(pos,"if"), cond, new Fun(thenPos,[],th), new Fun(elsePos,[],el));
	}

	AST parsePatternMatch(LexPosition pos)
	{
		//   case pmExpr CASES
		//==>
		//   let pmVar = pmExpr in (... let pmTryFirst = ... in pmTryFirst())
		AST   pmExpr = E(0);
		string pmVar = freshVarName();
		string pmTryFirst = freshVarName();
		AST   pmBody = parsePatternMatchCases(pmVar, pmTryFirst,
			new App(pos, new Var(pos, pmTryFirst)));
		return new Let(pos, pmVar, [], pmExpr, pmBody);
	}

	AST parsePatternMatchCases(string pmVar, string tryThisBranchVar, AST thenDoThis)
	{
		//    when pat: cBody
		//==>
		//    ... let failBranchVar = ... in
		//    let tryThisBranchVar = fun(){ if(test){cBody}else{failBranchVar()} } in thenDoThis
		if( tryEat("when") )
		{
			auto pos = currentPosition();
			string failBranchVar = freshVarName();

			auto pr = parsePattern();
			eat(":", "after when pattern");
			AST cBody = E(0);
			AST judgement = new App(pos, new Var(pos, "if"),
				ppTest(pmVar, pr), new Fun(pos,[],ppBind(pmVar, pr, cBody)),
				new Var(pos, failBranchVar));
			return parsePatternMatchCases(pmVar, failBranchVar, 
				new Let(pos, tryThisBranchVar, [],
					new Fun(pos,[],judgement), thenDoThis)
			);
		}
		else
		{
			auto pos = currentPosition();
			AST doNothing = new Fun(pos,[],
				new Str(pos, sprintf!"(pattern match failure:%s)"(pos)));
			return new Let(currentPosition(), tryThisBranchVar, [], doNothing, thenDoThis);
		}
	}

// hageshiku tenuki
	abstract class SinglePattern
	{
		string[] path;
		mixin SimpleClass;
		private AST access(string pmVar, string[] path) {
			auto pos = currentPosition();
			AST e = new Var(pos, pmVar);
			foreach(p; path)
				e = new App(pos, new Var(pos, "."), e, new Str(pos, p));
			return e;
		}
		private AST has(AST e, string k) {
			auto pos = currentPosition();
			return opAndAnd(
				new App(pos, new Var(pos, "_istable"), e),
				new App(pos, new Var(pos, ".?"), e, new Str(pos, k))
			);
		}
		private AST opAndAnd(AST a, AST b) {
			if( a is null ) return b;
			if( b is null ) return a;
			auto pos = currentPosition();
			return new App(pos,
				new Var(pos, "if"),
				a,
				new Fun(pos, [], b),
				new Fun(pos, [], new Int(pos, 0))
			);
		}
		AST ppTest(string pmVar) {
			AST c = null;
			for(int i=0; i<path.length; ++i)
				c = opAndAnd(c, has(access(pmVar,path[0..i]), path[i]));
			return c;
		}
		AST ppBind(string pmVar, AST thenDoThis) { return thenDoThis; }
	}
	class WildPattern : SinglePattern
	{
		mixin SimpleClass;
	}
	class VarPattern : SinglePattern
	{
		string name;
		mixin SimpleClass;
		AST ppBind(string pmVar, AST thenDoThis) {
			auto pos = currentPosition();
			return new Let(pos, name, [], access(pmVar,path), thenDoThis);
		}
	}
	class ConstantPattern : SinglePattern
	{
		AST e;
		mixin SimpleClass;
		AST ppTest(string pmVar) {
			auto pos = currentPosition();
			return opAndAnd( super.ppTest(pmVar),
				new App(pos, new Var(pos,"=="), access(pmVar,path), e)
			);
		}
	}

	SinglePattern[] parsePattern(string[] path = null)
	{
		SinglePattern[] result;
		if( tryEat("{") )
		{
			if( !tryEat("}") ) {
				do {
					string key = eatId("in table pattern", AllowQuoted);
					eat(":", "after field-id in table pattern");
					result ~= parsePattern(path ~ key);
				} while( tryEat(",") );
				eat("}", "at the end of table pattern");
			}
		}
		else
		{
			AST e = E(0);
			if(auto ev = cast(Var)e)
				if(ev.name == "_")
					result ~= new WildPattern(path);
				else
					result ~= new VarPattern(path, ev.name);
			else
				result ~= new ConstantPattern(path, e);
		}
		return result;
	}

	AST ppTest(string pmVar, SinglePattern[] pats)
	{
		auto pos = currentPosition();
		AST cond = null;
		foreach(p; pats) {
			AST c2 = p.ppTest(pmVar);
			if( c2 !is null )
				cond = cond is null ? c2
				    : new App(pos, new Var(pos,"&&"), cond, c2);
		}
		return cond is null ? new Int(currentPosition(), 1) : cond;
	}

	AST ppBind(string pmVar, SinglePattern[] pats, AST thenDoThis)
	{
		foreach(p; pats)
			thenDoThis = p.ppBind(pmVar, thenDoThis);
		return thenDoThis;
	}

	AST parseId()
	{
		scope(exit) lex.popFront;
		return new Str(currentPosition(), lex.front.str);
	}

	AST parseLambdaAfterOpenParen(LexPosition pos)
	{
		Parameter[] params;
		while( !tryEat(")") )
		{
			params ~= parseParam();
			if( !tryEat(",") ) {
				eat(")", "after function parameters");
				break;
			}
		}
		eat("{", "after function parameters");
		auto funbody = Body();
		eat("}", "after function body");
		return new Fun(pos, params, funbody);
	}

	Parameter parseParam()
	{
		string var;
		string[] lay;
		while( !closingBracket() && !lex.empty && lex.front.str!="," )
		{
			auto pos = currentPosition();
			string p = eatId("for function parameter", AllowQuoted);
			if( p == "@" )
				lay ~= "@" ~ eatId("after @", AllowQuoted);
			else if( var.empty )
				var = p;
			else
				throw genex!ParseException(pos, "one parameter has two names");
		}
		return new Parameter(var, lay);
	}

private:
	Lexer lex;
	this(Lexer lex) { this.lex = lex; }

	bool isNumber(string s)
	{
		return find!(`a<'0' || '9'<a`)(s).empty;
	}
	
	void eat(string kwd, lazy string msg)
	{
		if( !tryEat(kwd) )
			if( lex.empty )
				throw genex!UnexpectedEOF(
					currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
			else
				throw genex!ParseException(
					currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
	}

	bool tryEat(string kwd)
	{
		if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
			return false;
		lex.popFront;
		return true;
	}

	enum {AllowQuoted=true, DisallowQuoted=false};
	string eatId(lazy string msg, bool aq=DisallowQuoted)
	{
		if( lex.empty )
			throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
		if( !aq && lex.front.quoted )
			throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
		scope(exit) lex.popFront;
		return lex.front.str;
	}

	AST doNothingExpression()
	{
		return new Str(currentPosition(), "(empty function body)");
	}

	LexPosition currentPosition()
	{
		return lex.empty ? null : lex.front.pos;
	}
}

unittest
{
	mixin EasyAST;

	assert_eq(parseString(`123`), intl(123));
	assert_eq(parseString(`"foo"`), strl("foo"));
	assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
	assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
	assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
	assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
	assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
	assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
	assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
	assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
	assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
	assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
	assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
	assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
	assert_eq(parseString(`if 1 then 2`), call(var("if"),intl(1),fun([],intl(2)),fun([],strl("(empty function body)"))));
	assert_eq(parseString(`if 1 then: 2 else(3)`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
	assert_eq(parseString(`(if 1 then () else 3)()()`),
		call(call(call(var("if"),intl(1),fun([],strl("(empty function body)")),fun([],intl(3))))));
	assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
	assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
	assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
	assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
	assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
	assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
		funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], strl("(empty function body)")));

	assert_eq(parseString(`
		let x = 100; #comment
		let y = 200; #comment!!!!!
			x+y
	`),
		let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
	);

	assert_eq(parseString(`
		var fac = fun(x){ if(x <= 1) then 1 else x*fac(x-1) };
		fac(10)
	`),
		let("fac", "", fun(["x"],
			call(var("if"),
				call(var("<="), var("x"), intl(1)),
				fun([], intl(1)),
				fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
			)),
			call(var("fac"),intl(10))
		)
	);
}

unittest
{
	assert_throw!UnexpectedEOF(parseString(`1+`));
	assert_throw!ParseException(parseString(`1+2}`));
	assert_throw!UnexpectedEOF(parseString(`let "x"`));
	assert_throw!UnexpectedEOF(parseString(`var`));
	assert_throw!ParseException(parseString(`@val x ==`));
	assert_throw!ParseException(parseString(`if(){1}`));
	assert_throw!UnexpectedEOF(parseString(`f(`));
}

unittest
{
	mixin EasyAST;
	assert_eq(parseString(`def foo(x) { x+1 }; foo`),
		let("foo", "",
			fun(["x"], call(var("+"), var("x"), intl(1))),
			var("foo"))
	);

	assert_eq(parseString(`@@type ( x ) { x }`),
		let("@type", SystemLayer, fun(["x"], var("x")), lay(SystemLayer, var("@type"))) );

	assert_eq(parseString(`{}`), call(var("{}")));
	assert_eq(parseString(`{foo:1,"bar":2}`),
		call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
	assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
	assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
	assert_eq(parseString(`x{y:1}`), call(var(".="),var("x"),strl("y"),intl(1)));
}

unittest
{
	assert_nothrow(parseString(`
		case( 1 )
			when(x): 1
	`));
	assert_nothrow(parseString(`
		case 1
			when {aaaa:_}: 1
	`));
	assert_nothrow(parseString(`
		case 1
			when {aaaa:@value(x)}: 1
			when {aaaa:{bbb:_}, ccc:123}: 1
	`));
}