Check-in [5e407d7cf8]
Not logged in
Overview
SHA1 Hash:5e407d7cf8ae6c2e9072663f07573c64ae6df2e7
Date: 2010-11-08 20:42:14
User: kinaba
Comment:Lexer Refactored so that it can accpet multi-symbol token
Timelines: family | ancestors | descendants | both | trunk
Downloads: Tarball | ZIP archive
Other Links: files | file ages | manifest
Tags And Properties
Changes

Modified polemy/ast.d from [6aa982b32b1d6e05] to [79741a3186611288].

Modified polemy/eval.d from [baaa1d71de4dc9d8] to [003971cb94b55966].

173 173 ctxNeo.add(p, args[i]); 174 174 return eval(e.funbody, ctxNeo); 175 175 }); 176 176 } 177 177 throw new PolemyRuntimeException(sprintf!"Unknown Kind of Expression %s at [%s]"(typeid(_e), _e.pos)); 178 178 } 179 179 180 -/* 181 180 unittest 182 181 { 183 182 auto r = evalString(`var x = 21; x = x + x*x;`); 184 183 assert( r.val == new IntValue(BigInt(21+21*21)) ); 185 184 assert( r.ctx["x"] == new IntValue(BigInt(21+21*21)) ); 186 185 assert( !collectException(r.ctx["x"]) ); 187 186 assert( collectException(r.ctx["y"]) ); ................................................................................ 225 224 if(x<2) 226 225 { 1; } 227 226 else 228 227 { fib(x-1) + fib(x-2); }; 229 228 }; 230 229 print(fib(10));`); 231 230 } 232 -*/

Modified polemy/lex.d from [0972f7a454ea8e4f] to [5f52873e3ff7ae30].

2 2 * Authors: k.inaba 3 3 * License: NYSL 0.9982 http://www.kmonos.net/nysl/ 4 4 * 5 5 * Lexer for Polemy programming language. 6 6 */ 7 7 module polemy.lex; 8 8 import polemy._common; 9 +import std.file : readText; 10 +import std.ctype : isspace, isalnum; 9 11 10 -import std.file : readText; 11 -import std.string : munch; 12 -import std.ctype; 12 +/// Exception from this module 13 + 14 +class LexException : Exception 15 +{ 16 + this( const LexPosition pos, string msg ) 17 + { super(sprintf!"%s [%s]"(msg, pos)); this.pos = pos; } 18 + const LexPosition pos; 19 +}; 13 20 14 21 /// Represents a position in a source code 15 22 16 23 class LexPosition 17 24 { 18 25 immutable string filename; /// name of the source file 19 26 immutable int lineno; /// line number, 1, 2, ... ................................................................................ 74 81 assert( !__traits(compiles, t.pos=p) ); 75 82 assert( !__traits(compiles, t.str=789) ); 76 83 assert( !__traits(compiles, t.quoted=true) ); 77 84 } 78 85 79 86 /// Named Construtor for Lexer 80 87 81 -Lexer lexerFromFile(T...)( string filename, T rest ) 88 +auto lexerFromFile(T...)( string filename, T rest ) 82 89 { 83 90 return lexerFromString( std.file.readText(filename), filename, rest ); 84 91 } 85 92 86 93 /// Named Construtor for Lexer 87 94 88 -Lexer lexerFromString( string str, string filename="<unnamed>", int lineno=1, int column=1 ) 95 +auto lexerFromString(CharSeq)( CharSeq str, string filename="<unnamed>", int lineno=1, int column=1 ) 89 96 { 90 - return new Lexer(str, filename, lineno, column); 97 + return new LexerT!(PositionedReader!CharSeq)( 98 + PositionedReader!CharSeq(str, filename, lineno, column) 99 + ); 91 100 } 92 101 93 -/// Lexer is a forward range of Tokens 102 +/// Standard Lexer Type (all users have to know is that this is a forward range of Tokens) 94 103 95 -class Lexer 104 +alias LexerT!(PositionedReader!string) Lexer; 105 + 106 +/// Lexer Implementation 107 + 108 +class LexerT(Reader) 109 + if( isForwardRange!(Reader) && is(ElementType!(Reader) == dchar) ) 96 110 { 97 111 /// Range primitive 98 112 bool empty() /*@property*/ 99 113 { 100 114 return current is null; 101 115 } 102 116 ................................................................................ 110 124 void popFront() /*@property*/ 111 125 { 112 126 std.exception.enforce(current, "Lexer has already reached the end"); 113 127 current = readNext(); 114 128 } 115 129 116 130 /// Range primitive 117 - Lexer save() /*@property*/ 131 + typeof(this) save() /*@property*/ 118 132 { 119 - return new Lexer(this.tupleof); 133 + return new typeof(this)(reader.save, current); 120 134 } 121 135 122 136 private: // implementation 123 137 124 - string buffer; 125 - string filename; 126 - int lineno; 127 - int column; 138 + Reader reader; 128 139 Token current; 129 140 130 141 invariant() 131 142 { 132 - assert( buffer.empty || !std.ctype.isspace(buffer[0]) ); 143 + assert( reader.empty || !std.ctype.isspace(reader.front) ); 144 + } 145 + 146 + this( Reader reader, Token current = null ) 147 + { 148 + this.reader = reader; 149 + readWhile!isSpace(); 150 + this.current = (current is null ? readNext() : current); 151 + } 152 + 153 + public static { 154 + bool isSpace (dchar c) { return std.ctype.isspace(c)!=0; } 155 + bool isSymbol (dchar c) { return 0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_' && c!='\''; } 156 + bool isSSymbol (dchar c) { return !find("()[]{};", c).empty; } 157 + bool isMSymbol (dchar c) { return isSymbol(c) && !isSSymbol(c); } 158 + bool isLetter (dchar c) { return !isSpace(c) && !isSymbol(c); } 133 159 } 134 160 135 - this( string buffer, string filename, int lineno, int column, Token current=null ) 161 + string readQuoted(const LexPosition pos){char[] buf; return readQuoted(pos,buf);} 162 + string readQuoted(const LexPosition pos, ref char[] buf) 136 163 { 137 - this.buffer = buffer; 138 - this.filename = filename; 139 - this.lineno = lineno; 140 - this.column = column; 141 - skipws(); 142 - this.current = (current is null ? readNext() : current); 164 + if( reader.empty ) 165 + throw new LexException(pos, "EOF found while lexing a quoted-string"); 166 + dchar c = reader.front; 167 + reader.popFront; 168 + if( c == '"' ) 169 + return assumeUnique(buf); 170 + if( c == '\\' && !reader.empty ) { 171 + if( reader.front=='"' ) { 172 + reader.popFront; 173 + return readQuoted(pos,buf ~= '\"'); 174 + } 175 + if( reader.front=='\\' ) { 176 + reader.popFront; 177 + return readQuoted(pos,buf ~= '\\'); 178 + } 179 + } 180 + return readQuoted(pos,buf ~= c); 143 181 } 144 182 145 - void skipws() 183 + string readWhile(alias fn)() 146 184 { 147 - bool progress = false; 148 - do 149 - { 150 - string ws = buffer.munch(" \t"); 151 - column += ws.length; 152 - progress = !ws.empty; 153 - while( !buffer.empty && (buffer[0]=='\r' || buffer[0]=='\n') ) 154 - { 155 - progress = true; 156 - if( buffer[0] == '\n' ) 157 - buffer = buffer[1..$]; 158 - else // if( buffer.front == '\r' ) 159 - { 160 - buffer = buffer[1..$]; 161 - if( !buffer.empty && buffer[0]=='\n' ) 162 - buffer = buffer[1..$]; 163 - } 164 - lineno ++; 165 - column = 1; 166 - } 167 - }while( progress ); 185 + char[] buf; 186 + for(; !reader.empty && fn(reader.front); reader.popFront) 187 + buf ~= reader.front; 188 + return assumeUnique(buf); 168 189 } 169 190 170 - char readChar() 171 - { 172 - scope(exit) { 173 - buffer = buffer[1..$]; 174 - column ++; 175 - } 176 - return buffer[0]; 177 - } 178 - 179 - /// This is the main lexing routine 180 191 Token readNext() 181 192 { 182 - if( buffer.empty ) 193 + if( reader.empty ) 183 194 return null; 184 - scope(exit) 185 - skipws(); 186 - 187 - if( isSymbol(buffer[0]) ) 195 + scope(success) 196 + readWhile!isSpace(); 197 + if( reader.front == '#' ) // comment 198 + { 199 + reader = find(reader, '\n'); 200 + readWhile!isSpace(); 201 + return readNext(); 202 + } 203 + else if( reader.front == '"' ) // quoted 204 + { 205 + auto pos = reader.currentPosition(); 206 + reader.popFront; 207 + return new Token(pos, readQuoted(pos), true); 208 + } 209 + else if( isSSymbol(reader.front) ) // paren 210 + { 211 + auto pos = reader.currentPosition(); 212 + string s; s~=reader.front; reader.popFront; 213 + return new Token(pos, s, false); 214 + } 215 + else if( isMSymbol(reader.front) ) // symbol 188 216 { 189 - if( buffer[0] == '#' ) 190 - { 191 - // skip comment 192 - while( !buffer.empty && (buffer[0]!='\n' && buffer[0]!='\r') ) 193 - readChar(); 194 - skipws(); 195 - return readNext(); 196 - } 197 - else if( buffer[0] == '"' ) 198 - { 199 - // string literal 200 - auto pos = currentPosition(); 201 - string lit; 202 - readChar(); 203 - while( !buffer.empty && buffer[0]!='"' ) 204 - { 205 - // read one char 206 - char c = readChar(); 207 - if( c == '\\' ) 208 - { 209 - if( !buffer.empty && (buffer[0]=='\\' || buffer[0]=='"') ) 210 - lit ~= readChar(); 211 - else 212 - lit ~= c; 213 - } 214 - else if( c == '\n' ) 215 - { 216 - lit ~= c; 217 - lineno++; 218 - column = 1; 219 - } 220 - else if( c == '\r' ) 221 - { 222 - if( !buffer.empty && buffer[0]=='\n' ) 223 - readChar(); 224 - lit ~= '\n'; 225 - lineno++; 226 - column = 1; 227 - } 228 - else 229 - lit ~= c; 230 - } 231 - if( !buffer.empty ) 232 - readChar(); 233 - return new Token(pos, lit, true); 234 - } 235 - else 236 - { 237 - // normal symbol 238 - auto pos = currentPosition(); 239 - auto str = ""~readChar(); 240 - return new Token(pos, str, false); 241 - } 217 + auto pos = reader.currentPosition(); 218 + return new Token(pos, readWhile!isMSymbol(), false); 242 219 } 243 220 else 244 221 { 245 - auto pos = currentPosition(); 246 - int i = 0; 247 - while( i<buffer.length && !std.ctype.isspace(buffer[i]) && !isSymbol(buffer[i]) ) 248 - ++i; 249 - auto str = buffer[0 .. i]; 250 - buffer = buffer[i .. $]; 251 - column += i; 252 - return new Token(pos, str, false); 222 + auto pos = reader.currentPosition(); 223 + return new Token(pos, readWhile!isLetter(), false); 253 224 } 254 225 } 255 - 256 - bool isSymbol(char c) 257 - { 258 - return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_'); 259 - } 260 - 261 - immutable(LexPosition) currentPosition() 262 - { 263 - return new immutable(LexPosition)(filename, lineno, column); 264 - } 265 226 } 266 227 267 228 unittest 268 229 { 269 230 assert( std.range.isForwardRange!(Lexer) ); 270 231 } 271 232 272 233 unittest 273 234 { 274 - auto lex = lexerFromString("this is a \t\r\n pen :-( "); 235 + auto lex = lexerFromString("this is a \t\r\n pen :-( @@; "); 275 236 Token[] ts = std.array.array(lex); 276 237 277 238 assert_eq( ts[0].pos.lineno, 1 ); 278 239 assert_eq( ts[0].pos.column, 1 ); 279 240 assert( !ts[0].quoted ); 280 241 assert_eq( ts[0].str, "this" ); 281 242 ................................................................................ 290 251 assert_eq( ts[2].str, "a" ); 291 252 292 253 assert_eq( ts[3].pos.lineno, 2 ); 293 254 assert_eq( ts[3].pos.column, 2 ); 294 255 assert( !ts[3].quoted ); 295 256 assert_eq( ts[3].str, "pen" ); 296 257 297 - // consecutive symbols are always separated 298 - // hence, no "++" or "<<" or ... 299 - 300 258 assert_eq( ts[4].pos.lineno, 2 ); 301 259 assert_eq( ts[4].pos.column, 6 ); 302 - assert_eq( ts[4].str, ":" ); 260 + assert_eq( ts[4].str, ":-" ); 303 261 304 262 assert_eq( ts[5].pos.lineno, 2 ); 305 - assert_eq( ts[5].pos.column, 7 ); 306 - assert_eq( ts[5].str, "-" ); 263 + assert_eq( ts[5].pos.column, 8 ); 264 + assert_eq( ts[5].str, "(" ); 265 + assert_eq( ts[6].str, "@@" ); 266 + assert_eq( ts[7].str, ";" ); // paren and simicolons are split 307 267 308 - assert_eq( ts[6].pos.lineno, 2 ); 309 - assert_eq( ts[6].pos.column, 8 ); 310 - assert_eq( ts[6].str, "(" ); 311 - 312 - assert_eq( ts.length, 7 ); 268 + assert_eq( ts.length, 8 ); 313 269 } 314 270 315 271 unittest 316 272 { 317 - auto lex2 = lexerFromString(" a12\n3a 5 "); 318 - assert_eq( lex2.front.str, "a12" ); 319 - lex2.popFront; 320 - auto lex3 = lex2.save; 321 - assert_eq( lex2.front.str, "3a" ); 322 - lex2.popFront; 323 - assert_eq( lex3.front.str, "3a" ); 324 - assert_eq( lex2.front.str, "5" ); 325 - lex2.popFront; 326 - lex3.popFront; 327 - assert( lex2.empty ); 328 - assert( !lex3.empty ); 329 - assert_eq( lex3.front.str, "5" ); 330 -} 331 - 332 -unittest 333 -{ 334 -//!! be sure to run the unittest on the root of the source directory 273 + // !! be sure to run the unittest on the root of the source directory 335 274 auto lexf = lexerFromFile("polemy/lex.d"); 336 275 lexf = find!`a.str == "module"`(lexf); 337 276 assert_eq( lexf.front.str, "module" ); 338 277 assert_eq( lexf.front.pos.filename, "polemy/lex.d" ); 339 278 assert_eq( lexf.front.pos.lineno, 7 ); 340 279 assert_eq( lexf.front.pos.column, 1 ); 341 280 lexf.popFront; 342 281 assert_eq( lexf.front.str, "polemy" ); 343 282 assert_eq( lexf.front.pos.lineno, 7 ); 344 283 assert_eq( lexf.front.pos.column, 8 ); 345 284 lexf.popFront; 346 - assert_eq( lexf.front.str, "." ); 347 285 lexf.popFront; 348 - assert_eq( lexf.front.str, "lex" ); 349 286 lexf.popFront; 350 - assert_eq( lexf.front.str, ";" ); 351 287 lexf.popFront; 352 288 assert_eq( lexf.front.str, "import" ); 353 289 assert_eq( lexf.front.pos.lineno, 8 ); 354 290 assert_eq( lexf.front.pos.column, 1 ); 355 291 } 292 + 293 +unittest 294 +{ 295 + assert_throw!LexException( lexerFromString(`"`) ); 296 +} 356 297 357 298 unittest 358 299 { 359 300 auto lex = lexerFromString(`my # comment should`~"\r\n"~`# hey!! 360 301 be ignored. 361 302 hahaha"hihihi""hu\\\"huhu"#123 aa 362 -123 aa "aaa`~"\r\n"~`bbb # 123`~"\r\n"~`eee" 303 +123 aa "aaa`~"\n"~`bbb # 123`~"\r\n"~`eee" 363 304 zzz 364 305 `); 365 306 Token[] ts = std.array.array(lex); 366 307 assert_eq( ts[0].str, "my" ); 367 308 assert_eq( ts[0].pos.lineno, 1 ); 368 309 assert( !ts[0].quoted ); 369 310 assert_eq( ts[1].str, "be" ); ................................................................................ 388 329 assert_eq( ts[9].pos.lineno, 5 ); 389 330 assert_eq( ts[9].str, "aaa\nbbb # 123\neee" ); 390 331 assert( ts[9].quoted ); 391 332 assert_eq( ts[10].pos.lineno, 8 ); 392 333 assert( !ts[10].quoted ); 393 334 assert_eq( ts.length, 11 ); 394 335 } 336 + 337 +unittest 338 +{ 339 + auto lex2 = lexerFromString(" a12\n3a 5 "); 340 + assert_eq( lex2.front.str, "a12" ); 341 + lex2.popFront; 342 + auto lex3 = lex2.save; 343 + assert_eq( lex2.front.str, "3a" ); 344 + lex2.popFront; 345 + assert_eq( lex3.front.str, "3a" ); 346 + assert_eq( lex2.front.str, "5" ); 347 + lex2.popFront; 348 + lex3.popFront; 349 + assert( lex2.empty ); 350 + assert( !lex3.empty ); 351 + assert_eq( lex3.front.str, "5" ); 352 +} 353 + 354 +/// Forward range for reader character by character, 355 +/// keeping track of position information and caring \r\n -> \n conversion. 356 + 357 +private 358 +struct PositionedReader(CharSeq) 359 + if( isForwardRange!(CharSeq) && is(ElementType!(CharSeq) == dchar) ) 360 +{ 361 + CharSeq buffer; 362 + string filename; 363 + int lineno; 364 + int column; 365 + 366 + /// Range primitive 367 + bool empty() /*@property*/ 368 + { 369 + return buffer.empty; 370 + } 371 + 372 + /// Range primitive 373 + dchar front() /*@property*/ 374 + { 375 + dchar c = buffer.front; 376 + return (c=='\r' ? '\n' : c); 377 + } 378 + 379 + /// Range primitive 380 + void popFront() /*@property*/ 381 + { 382 + dchar c = buffer.front; 383 + buffer.popFront; 384 + if( c=='\r' ) 385 + { 386 + if( !buffer.empty && buffer.front=='\n' ) 387 + buffer.popFront; 388 + c = '\n'; 389 + } 390 + if( c=='\n' ) 391 + { 392 + lineno ++; 393 + column = 1; 394 + } 395 + else 396 + column ++; 397 + } 398 + 399 + /// Range primitive 400 + typeof(this) save() /*@property*/ 401 + { 402 + return this; 403 + } 404 + 405 + /// Get the current position 406 + immutable(LexPosition) currentPosition() const 407 + { 408 + return new immutable(LexPosition)(filename, lineno, column); 409 + } 410 +} 411 + 412 +unittest 413 +{ 414 + assert( isForwardRange!(PositionedReader!string) ); 415 + assert( is(ElementType!(PositionedReader!string) == dchar) ); 416 +}

Modified polemy/parse.d from [752d9af2ecdd593b] to [de91c770214a43a1].

344 344 new FuncallExpression(null, 345 345 new FunLiteralExpression(null, ["abc"], [ 346 346 ]), 347 347 new IntLiteralExpression(null, BigInt(4)) 348 348 )))); 349 349 } 350 350 351 -/* 352 351 unittest 353 352 { 354 353 auto p = parserFromString(`var x = 1; var f = fun(){x=x+1;}; f(); f(); x;`); 355 354 Program prog = p.parseProgram(); 356 355 } 357 356 358 357 unittest ................................................................................ 363 362 new VarExpression(null, "if"), 364 363 new FuncallExpression(null, new VarExpression(null,"<"), new VarExpression(null,"x"), 365 364 new IntLiteralExpression(null, BigInt(2))), 366 365 new FunLiteralExpression(null, [], [new ExprStatement(null, new IntLiteralExpression(null, BigInt(1)))]), 367 366 new FunLiteralExpression(null, [], [new ExprStatement(null, new VarExpression(null, "x"))]) 368 367 ))); 369 368 } 370 -*/

Modified polemy/tricks.d from [aa60c34591041ca2] to [e911a30d1f923777].

16 16 auto writer = appender!string(); 17 17 formattedWrite(writer, fmt, params); 18 18 return writer.data; 19 19 } 20 20 21 21 unittest 22 22 { 23 - assert( sprintf!"%s == %d"("1+2", 3) == "1+2 == 3" ); 23 + assert( sprintf!"%s == %d"("1+2", 3) == "1+2 == 3" ); 24 24 assert( sprintf!"%s == %04d"("1+2", 3) == "1+2 == 0003" ); 25 25 } 26 26 27 27 /// Unittest helper that asserts an expression must throw something 28 28 29 29 void assert_throw(ExceptionType, T, string fn=__FILE__, int ln=__LINE__)(lazy T t, string msg="") 30 30 { ................................................................................ 85 85 alias assertOp!(`<`) assert_lt; 86 86 alias assertOp!(`<=`) assert_le; 87 87 alias assertOp!(`>`) assert_gt; 88 88 alias assertOp!(`>=`) assert_ge; 89 89 90 90 unittest 91 91 { 92 - assert_nothrow( assert_eq("foo", "foo") ); 93 - assert_nothrow( assert_ne("foo", "bar") ); 94 - assert_nothrow( assert_lt("bar", "foo") ); 95 - assert_nothrow( assert_le("bar", "foo") ); 96 - assert_nothrow( assert_le("bar", "bar") ); 97 - assert_nothrow( assert_gt("foo", "bar") ); 98 - assert_nothrow( assert_ge("foo", "bar") ); 99 - assert_nothrow( assert_ge("bar", "bar") ); 92 + assert_nothrow( assert_eq(1, 1) ); 93 + assert_nothrow( assert_ne(1, 0) ); 94 + assert_nothrow( assert_lt(0, 1) ); 95 + assert_nothrow( assert_le(0, 1) ); 96 + assert_nothrow( assert_le(0, 0) ); 97 + assert_nothrow( assert_gt(1, 0) ); 98 + assert_nothrow( assert_ge(1, 0) ); 99 + assert_nothrow( assert_ge(0, 0) ); 100 100 101 - assert_throw!AssertError( assert_eq("foo", "bar") ); 102 - assert_throw!AssertError( assert_ne("foo", "foo") ); 103 - assert_throw!AssertError( assert_lt("foo", "foo") ); 104 - assert_throw!AssertError( assert_lt("foo", "bar") ); 105 - assert_throw!AssertError( assert_le("foo", "bar") ); 106 - assert_throw!AssertError( assert_gt("bar", "bar") ); 107 - assert_throw!AssertError( assert_gt("bar", "foo") ); 108 - assert_throw!AssertError( assert_ge("bar", "foo") ); 101 + assert_throw!AssertError( assert_eq(1, 0) ); 102 + assert_throw!AssertError( assert_ne(1, 1) ); 103 + assert_throw!AssertError( assert_lt(1, 1) ); 104 + assert_throw!AssertError( assert_lt(1, 0) ); 105 + assert_throw!AssertError( assert_le(1, 0) ); 106 + assert_throw!AssertError( assert_gt(0, 0) ); 107 + assert_throw!AssertError( assert_gt(0, 1) ); 108 + assert_throw!AssertError( assert_ge(0, 1) ); 109 109 110 110 class Temp { bool opEquals(int x){return x/x==x;} } 111 111 assert_throw!AssertError( assert_eq(new Temp, 0) ); 112 112 assert_nothrow ( assert_eq(new Temp, 1) ); 113 113 assert_throw!AssertError( assert_eq(new Temp, 2) ); 114 114 } 115 115