Diff
Not logged in

Differences From Artifact [783ee3b0fe58558e]:

To Artifact [0972f7a454ea8e4f]:


44 assert( !__traits(compiles, p.column =222) ); 44 assert( !__traits(compiles, p.column =222) ); 45 } 45 } 46 46 47 /// Represents a lexer token 47 /// Represents a lexer token 48 48 49 class Token 49 class Token 50 { 50 { 51 /// currently we have three kinds of token < 52 enum Kind { < 53 identifier, /// anything other than others < 54 stringLiteral, /// "string literal" < 55 number /// 42 < 56 }; < 57 immutable LexPosition pos; /// position where the token occurred in the | 51 immutable LexPosition pos; /// Position where the token occurred in t 58 immutable string str; /// the token string itself | 52 immutable string str; /// The token string itself 59 immutable Kind kind; /// which kind of token? < > 53 immutable bool quoted; /// Was it a "quoted" token or unquoted? 60 54 61 mixin SimpleConstructor; 55 mixin SimpleConstructor; 62 mixin SimpleCompare; 56 mixin SimpleCompare; 63 } 57 } 64 58 65 unittest 59 unittest 66 { 60 { 67 auto p = new immutable(LexPosition)("hello.cpp", 123, 45); 61 auto p = new immutable(LexPosition)("hello.cpp", 123, 45); 68 auto t = new Token(p, "class", Token.Kind.identifier); | 62 auto t = new Token(p, "class", false); > 63 auto u = new Token(p, "class", true); 69 64 70 assert_eq( t.pos, p ); 65 assert_eq( t.pos, p ); 71 assert_eq( t.str, "class" ); 66 assert_eq( t.str, "class" ); > 67 assert( !t.quoted ); 72 assert_eq( t, new Token(p, "class", Token.Kind.identifier) ); | 68 assert_eq( t, new Token(p, "class", false) ); 73 assert_lt( t, new Token(p, "struct", Token.Kind.identifier) ); | 69 assert_lt( t, new Token(p, "struct", false) ); > 70 assert_ne( t, u ); > 71 assert( u.quoted ); 74 72 75 assert( !__traits(compiles, new Token) ); 73 assert( !__traits(compiles, new Token) ); 76 assert( !__traits(compiles, t.pos=p) ); 74 assert( !__traits(compiles, t.pos=p) ); 77 assert( !__traits(compiles, t.str=789) ); 75 assert( !__traits(compiles, t.str=789) ); > 76 assert( !__traits(compiles, t.quoted=true) ); 78 } 77 } 79 78 80 /// Named Construtor for Lexer 79 /// Named Construtor for Lexer 81 80 82 Lexer lexerFromFile(T...)( string filename, T rest ) 81 Lexer lexerFromFile(T...)( string filename, T rest ) 83 { 82 { 84 return lexerFromString( std.file.readText(filename), filename, rest ); 83 return lexerFromString( std.file.readText(filename), filename, rest ); ................................................................................................................................................................................ 113 std.exception.enforce(current, "Lexer has already reached the en 112 std.exception.enforce(current, "Lexer has already reached the en 114 current = readNext(); 113 current = readNext(); 115 } 114 } 116 115 117 /// Range primitive 116 /// Range primitive 118 Lexer save() /*@property*/ 117 Lexer save() /*@property*/ 119 { 118 { 120 return new Lexer(buffer, filename, lineno, column, current); | 119 return new Lexer(this.tupleof); 121 } 120 } 122 121 123 private: // implementation 122 private: // implementation 124 123 125 string buffer; 124 string buffer; 126 string filename; 125 string filename; 127 int lineno; 126 int lineno; ................................................................................................................................................................................ 227 column = 1; 226 column = 1; 228 } 227 } 229 else 228 else 230 lit ~= c; 229 lit ~= c; 231 } 230 } 232 if( !buffer.empty ) 231 if( !buffer.empty ) 233 readChar(); 232 readChar(); 234 return new Token(pos, lit, Token.Kind.stringLite | 233 return new Token(pos, lit, true); 235 } 234 } 236 else 235 else 237 { 236 { 238 // normal symbol 237 // normal symbol 239 auto pos = currentPosition(); 238 auto pos = currentPosition(); 240 auto str = ""~readChar(); 239 auto str = ""~readChar(); 241 return new Token(pos, str, Token.Kind.identifier | 240 return new Token(pos, str, false); 242 } 241 } 243 } 242 } 244 else 243 else 245 { 244 { 246 auto pos = currentPosition(); 245 auto pos = currentPosition(); 247 int i = 0; 246 int i = 0; 248 while( i<buffer.length && !std.ctype.isspace(buffer[i]) 247 while( i<buffer.length && !std.ctype.isspace(buffer[i]) 249 ++i; 248 ++i; 250 auto str = buffer[0 .. i]; 249 auto str = buffer[0 .. i]; 251 buffer = buffer[i .. $]; 250 buffer = buffer[i .. $]; 252 column += i; 251 column += i; 253 bool isNumber = find!(`a<'0' || '9'<a`)(str).empty; < 254 return new Token(pos, str, isNumber ? Token.Kind.number | 252 return new Token(pos, str, false); 255 } 253 } 256 } 254 } 257 255 258 bool isSymbol(char c) 256 bool isSymbol(char c) 259 { 257 { 260 return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_'); 258 return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_'); 261 } 259 } ................................................................................................................................................................................ 269 unittest 267 unittest 270 { 268 { 271 assert( std.range.isForwardRange!(Lexer) ); 269 assert( std.range.isForwardRange!(Lexer) ); 272 } 270 } 273 271 274 unittest 272 unittest 275 { 273 { 276 auto lex = lexerFromString("this is a \t\n pen :-( "); | 274 auto lex = lexerFromString("this is a \t\r\n pen :-( "); 277 Token[] ts = std.array.array(lex); 275 Token[] ts = std.array.array(lex); 278 276 279 assert( ts[0].pos.lineno == 1 ); | 277 assert_eq( ts[0].pos.lineno, 1 ); 280 assert( ts[0].pos.column == 1 ); | 278 assert_eq( ts[0].pos.column, 1 ); 281 assert( ts[0].kind == Token.Kind.identifier ); | 279 assert( !ts[0].quoted ); 282 assert( ts[0].str == "this" ); | 280 assert_eq( ts[0].str, "this" ); > 281 > 282 assert_eq( ts[1].pos.lineno, 1 ); > 283 assert_eq( ts[1].pos.column, 6 ); > 284 assert( !ts[1].quoted ); > 285 assert_eq( ts[1].str, "is" ); 283 286 284 assert( ts[1].pos.lineno == 1 ); | 287 assert_eq( ts[2].pos.lineno, 1 ); 285 assert( ts[1].pos.column == 6 ); | 288 assert_eq( ts[2].pos.column, 9 ); 286 assert( ts[1].kind == Token.Kind.identifier ); | 289 assert( !ts[2].quoted ); 287 assert( ts[1].str == "is" ); | 290 assert_eq( ts[2].str, "a" ); 288 291 289 assert( ts[2].pos.lineno == 1 ); < 290 assert( ts[2].pos.column == 9 ); < 291 assert( ts[2].kind == Token.Kind.identifier ); < 292 assert( ts[2].str == "a" ); < 293 < 294 assert( ts[3].pos.lineno == 2 ); | 292 assert_eq( ts[3].pos.lineno, 2 ); 295 assert( ts[3].pos.column == 2 ); | 293 assert_eq( ts[3].pos.column, 2 ); 296 assert( ts[3].kind == Token.Kind.identifier ); < > 294 assert( !ts[3].quoted ); 297 assert( ts[3].str == "pen" ); | 295 assert_eq( ts[3].str, "pen" ); 298 296 299 // consecutive symbols are always separated 297 // consecutive symbols are always separated 300 // hence, no "++" or "<<" or ... 298 // hence, no "++" or "<<" or ... 301 299 302 assert( ts[4].pos.lineno == 2 ); | 300 assert_eq( ts[4].pos.lineno, 2 ); 303 assert( ts[4].pos.column == 6 ); | 301 assert_eq( ts[4].pos.column, 6 ); 304 assert( ts[4].str == ":" ); | 302 assert_eq( ts[4].str, ":" ); > 303 > 304 assert_eq( ts[5].pos.lineno, 2 ); > 305 assert_eq( ts[5].pos.column, 7 ); > 306 assert_eq( ts[5].str, "-" ); 305 307 306 assert( ts[5].pos.lineno == 2 ); | 308 assert_eq( ts[6].pos.lineno, 2 ); 307 assert( ts[5].pos.column == 7 ); | 309 assert_eq( ts[6].pos.column, 8 ); 308 assert( ts[5].str == "-" ); | 310 assert_eq( ts[6].str, "(" ); 309 311 310 assert( ts[6].pos.lineno == 2 ); < 311 assert( ts[6].pos.column == 8 ); < 312 assert( ts[6].str == "(" ); < 313 < 314 assert( ts.length == 7 ); | 312 assert_eq( ts.length, 7 ); 315 } 313 } 316 314 317 unittest 315 unittest 318 { 316 { 319 auto lex2 = lexerFromString(" a12\n3a 5 "); 317 auto lex2 = lexerFromString(" a12\n3a 5 "); 320 assert( lex2.front.str == "a12" ); | 318 assert_eq( lex2.front.str, "a12" ); 321 assert( lex2.front.kind == Token.Kind.identifier ); < 322 lex2.popFront; 319 lex2.popFront; 323 auto lex3 = lex2.save; 320 auto lex3 = lex2.save; 324 assert( lex2.front.str == "3a" ); | 321 assert_eq( lex2.front.str, "3a" ); 325 assert( lex2.front.kind == Token.Kind.identifier ); < 326 lex2.popFront; 322 lex2.popFront; 327 assert( lex3.front.str == "3a" ); | 323 assert_eq( lex3.front.str, "3a" ); 328 assert( lex3.front.kind == Token.Kind.identifier ); < 329 assert( lex2.front.str == "5" ); | 324 assert_eq( lex2.front.str, "5" ); 330 assert( lex2.front.kind == Token.Kind.number ); < 331 lex2.popFront; 325 lex2.popFront; 332 lex3.popFront; 326 lex3.popFront; 333 assert( lex2.empty ); 327 assert( lex2.empty ); 334 assert( !lex3.empty ); 328 assert( !lex3.empty ); 335 assert( lex3.front.str == "5" ); | 329 assert_eq( lex3.front.str, "5" ); 336 assert( lex3.front.kind == Token.Kind.number ); < 337 } 330 } 338 331 339 unittest 332 unittest 340 { 333 { 341 //!! be sure to run the unittest on the root of the source directory 334 //!! be sure to run the unittest on the root of the source directory 342 auto lexf = lexerFromFile("polemy/lex.d"); 335 auto lexf = lexerFromFile("polemy/lex.d"); 343 lexf = find!`a.str == "module"`(lexf); 336 lexf = find!`a.str == "module"`(lexf); 344 assert( lexf.front.str == "module", lexf.front.str ); | 337 assert_eq( lexf.front.str, "module" ); 345 assert( lexf.front.pos.filename == "polemy/lex.d" ); | 338 assert_eq( lexf.front.pos.filename, "polemy/lex.d" ); 346 assert( lexf.front.pos.lineno == 7 ); | 339 assert_eq( lexf.front.pos.lineno, 7 ); 347 assert( lexf.front.pos.column == 1 ); | 340 assert_eq( lexf.front.pos.column, 1 ); > 341 lexf.popFront; > 342 assert_eq( lexf.front.str, "polemy" ); > 343 assert_eq( lexf.front.pos.lineno, 7 ); > 344 assert_eq( lexf.front.pos.column, 8 ); > 345 lexf.popFront; > 346 assert_eq( lexf.front.str, "." ); 348 lexf.popFront; 347 lexf.popFront; 349 assert( lexf.front.str == "polemy" ); | 348 assert_eq( lexf.front.str, "lex" ); 350 assert( lexf.front.pos.lineno == 7 ); < 351 assert( lexf.front.pos.column == 8 ); < 352 lexf.popFront; 349 lexf.popFront; 353 assert( lexf.front.str == "." ); | 350 assert_eq( lexf.front.str, ";" ); 354 lexf.popFront; 351 lexf.popFront; 355 assert( lexf.front.str == "lex" ); < 356 lexf.popFront; < 357 assert( lexf.front.str == ";" ); | 352 assert_eq( lexf.front.str, "import" ); 358 lexf.popFront; < 359 assert( lexf.front.str == "import" ); < 360 assert( lexf.front.pos.lineno == 8 ); | 353 assert_eq( lexf.front.pos.lineno, 8 ); 361 assert( lexf.front.pos.column == 1 ); | 354 assert_eq( lexf.front.pos.column, 1 ); 362 } 355 } 363 356 364 unittest 357 unittest 365 { 358 { 366 auto lex = lexerFromString(`my # comment should | 359 auto lex = lexerFromString(`my # comment should`~"\r\n"~`# hey!! 367 # hey!! < 368 be ignored. 360 be ignored. 369 hahaha"hihihi""hu\\\"huhu"#123 aa 361 hahaha"hihihi""hu\\\"huhu"#123 aa 370 123 aa "aaa | 362 123 aa "aaa`~"\r\n"~`bbb # 123`~"\r\n"~`eee" 371 bbb # 123 < 372 eee" < 373 zzz 363 zzz 374 `); 364 `); 375 Token[] ts = std.array.array(lex); 365 Token[] ts = std.array.array(lex); 376 assert( ts[0].str == "my" ); | 366 assert_eq( ts[0].str, "my" ); 377 assert( ts[0].pos.lineno == 1 ); | 367 assert_eq( ts[0].pos.lineno, 1 ); > 368 assert( !ts[0].quoted ); 378 assert( ts[1].str == "be" ); | 369 assert_eq( ts[1].str, "be" ); 379 assert( ts[1].pos.lineno == 3 ); | 370 assert_eq( ts[1].pos.lineno, 3 ); > 371 assert( !ts[1].quoted ); 380 assert( ts[2].str == "ignored" ); | 372 assert_eq( ts[2].str, "ignored" ); > 373 assert( !ts[2].quoted ); 381 assert( ts[3].str == "." ); | 374 assert_eq( ts[3].str, "." ); > 375 assert( !ts[3].quoted ); 382 assert( ts[4].str == "hahaha" ); | 376 assert_eq( ts[4].str, "hahaha" ); 383 assert( ts[4].pos.lineno == 4 ); | 377 assert_eq( ts[4].pos.lineno, 4 ); 384 assert( ts[4].kind == Token.Kind.identifier ); < > 378 assert( !ts[4].quoted ); 385 assert( ts[5].str == "hihihi" ); | 379 assert_eq( ts[5].str, "hihihi" ); 386 assert( ts[5].pos.lineno == 4 ); | 380 assert_eq( ts[5].pos.lineno, 4 ); 387 assert( ts[5].kind == Token.Kind.stringLiteral ); < > 381 assert( ts[5].quoted ); 388 assert( ts[6].str == `hu\"huhu` ); | 382 assert_eq( ts[6].str, `hu\"huhu` ); 389 assert( ts[6].kind == Token.Kind.stringLiteral ); < 390 assert( ts[6].pos.lineno == 4 ); | 383 assert_eq( ts[6].pos.lineno, 4 ); > 384 assert( ts[6].quoted ); 391 assert( ts[7].str == "123" ); | 385 assert_eq( ts[7].str, "123" ); 392 assert( ts[7].pos.lineno == 5 ); | 386 assert_eq( ts[7].pos.lineno, 5 ); 393 assert( ts[7].kind == Token.Kind.number ); < 394 assert( ts[8].str == "aa" ); | 387 assert_eq( ts[8].str, "aa" ); 395 assert( ts[9].pos.lineno == 5 ); | 388 assert_eq( ts[9].pos.lineno, 5 ); 396 assert( ts[9].str == "aaa\nbbb # 123\neee" ); | 389 assert_eq( ts[9].str, "aaa\nbbb # 123\neee" ); 397 assert( ts[9].kind == Token.Kind.stringLiteral ); < > 390 assert( ts[9].quoted ); 398 assert( ts[10].pos.lineno == 8 ); | 391 assert_eq( ts[10].pos.lineno, 8 ); > 392 assert( !ts[10].quoted ); 399 assert( ts.length == 11 ); | 393 assert_eq( ts.length, 11 ); 400 } 394 }