Diff
Not logged in

Differences From Artifact [783ee3b0fe58558e]:

To Artifact [0972f7a454ea8e4f]:


44 44 assert( !__traits(compiles, p.column =222) ); 45 45 } 46 46 47 47 /// Represents a lexer token 48 48 49 49 class Token 50 50 { 51 - /// currently we have three kinds of token 52 - enum Kind { 53 - identifier, /// anything other than others 54 - stringLiteral, /// "string literal" 55 - number /// 42 56 - }; 57 - immutable LexPosition pos; /// position where the token occurred in the source 58 - immutable string str; /// the token string itself 59 - immutable Kind kind; /// which kind of token? 51 + immutable LexPosition pos; /// Position where the token occurred in the source 52 + immutable string str; /// The token string itself 53 + immutable bool quoted; /// Was it a "quoted" token or unquoted? 60 54 61 55 mixin SimpleConstructor; 62 56 mixin SimpleCompare; 63 57 } 64 58 65 59 unittest 66 60 { 67 61 auto p = new immutable(LexPosition)("hello.cpp", 123, 45); 68 - auto t = new Token(p, "class", Token.Kind.identifier); 62 + auto t = new Token(p, "class", false); 63 + auto u = new Token(p, "class", true); 69 64 70 65 assert_eq( t.pos, p ); 71 66 assert_eq( t.str, "class" ); 72 - assert_eq( t, new Token(p, "class", Token.Kind.identifier) ); 73 - assert_lt( t, new Token(p, "struct", Token.Kind.identifier) ); 67 + assert( !t.quoted ); 68 + assert_eq( t, new Token(p, "class", false) ); 69 + assert_lt( t, new Token(p, "struct", false) ); 70 + assert_ne( t, u ); 71 + assert( u.quoted ); 74 72 75 73 assert( !__traits(compiles, new Token) ); 76 74 assert( !__traits(compiles, t.pos=p) ); 77 75 assert( !__traits(compiles, t.str=789) ); 76 + assert( !__traits(compiles, t.quoted=true) ); 78 77 } 79 78 80 79 /// Named Construtor for Lexer 81 80 82 81 Lexer lexerFromFile(T...)( string filename, T rest ) 83 82 { 84 83 return lexerFromString( std.file.readText(filename), filename, rest ); ................................................................................ 113 112 std.exception.enforce(current, "Lexer has already reached the end"); 114 113 current = readNext(); 115 114 } 116 115 117 116 /// Range primitive 118 117 Lexer save() /*@property*/ 119 118 { 120 - return new Lexer(buffer, filename, lineno, column, current); 119 + return new Lexer(this.tupleof); 121 120 } 122 121 123 122 private: // implementation 124 123 125 124 string buffer; 126 125 string filename; 127 126 int lineno; ................................................................................ 227 226 column = 1; 228 227 } 229 228 else 230 229 lit ~= c; 231 230 } 232 231 if( !buffer.empty ) 233 232 readChar(); 234 - return new Token(pos, lit, Token.Kind.stringLiteral); 233 + return new Token(pos, lit, true); 235 234 } 236 235 else 237 236 { 238 237 // normal symbol 239 238 auto pos = currentPosition(); 240 239 auto str = ""~readChar(); 241 - return new Token(pos, str, Token.Kind.identifier); 240 + return new Token(pos, str, false); 242 241 } 243 242 } 244 243 else 245 244 { 246 245 auto pos = currentPosition(); 247 246 int i = 0; 248 247 while( i<buffer.length && !std.ctype.isspace(buffer[i]) && !isSymbol(buffer[i]) ) 249 248 ++i; 250 249 auto str = buffer[0 .. i]; 251 250 buffer = buffer[i .. $]; 252 251 column += i; 253 - bool isNumber = find!(`a<'0' || '9'<a`)(str).empty; 254 - return new Token(pos, str, isNumber ? Token.Kind.number : Token.Kind.identifier); 252 + return new Token(pos, str, false); 255 253 } 256 254 } 257 255 258 256 bool isSymbol(char c) 259 257 { 260 258 return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_'); 261 259 } ................................................................................ 269 267 unittest 270 268 { 271 269 assert( std.range.isForwardRange!(Lexer) ); 272 270 } 273 271 274 272 unittest 275 273 { 276 - auto lex = lexerFromString("this is a \t\n pen :-( "); 274 + auto lex = lexerFromString("this is a \t\r\n pen :-( "); 277 275 Token[] ts = std.array.array(lex); 278 276 279 - assert( ts[0].pos.lineno == 1 ); 280 - assert( ts[0].pos.column == 1 ); 281 - assert( ts[0].kind == Token.Kind.identifier ); 282 - assert( ts[0].str == "this" ); 277 + assert_eq( ts[0].pos.lineno, 1 ); 278 + assert_eq( ts[0].pos.column, 1 ); 279 + assert( !ts[0].quoted ); 280 + assert_eq( ts[0].str, "this" ); 281 + 282 + assert_eq( ts[1].pos.lineno, 1 ); 283 + assert_eq( ts[1].pos.column, 6 ); 284 + assert( !ts[1].quoted ); 285 + assert_eq( ts[1].str, "is" ); 283 286 284 - assert( ts[1].pos.lineno == 1 ); 285 - assert( ts[1].pos.column == 6 ); 286 - assert( ts[1].kind == Token.Kind.identifier ); 287 - assert( ts[1].str == "is" ); 287 + assert_eq( ts[2].pos.lineno, 1 ); 288 + assert_eq( ts[2].pos.column, 9 ); 289 + assert( !ts[2].quoted ); 290 + assert_eq( ts[2].str, "a" ); 288 291 289 - assert( ts[2].pos.lineno == 1 ); 290 - assert( ts[2].pos.column == 9 ); 291 - assert( ts[2].kind == Token.Kind.identifier ); 292 - assert( ts[2].str == "a" ); 293 - 294 - assert( ts[3].pos.lineno == 2 ); 295 - assert( ts[3].pos.column == 2 ); 296 - assert( ts[3].kind == Token.Kind.identifier ); 297 - assert( ts[3].str == "pen" ); 292 + assert_eq( ts[3].pos.lineno, 2 ); 293 + assert_eq( ts[3].pos.column, 2 ); 294 + assert( !ts[3].quoted ); 295 + assert_eq( ts[3].str, "pen" ); 298 296 299 297 // consecutive symbols are always separated 300 298 // hence, no "++" or "<<" or ... 301 299 302 - assert( ts[4].pos.lineno == 2 ); 303 - assert( ts[4].pos.column == 6 ); 304 - assert( ts[4].str == ":" ); 300 + assert_eq( ts[4].pos.lineno, 2 ); 301 + assert_eq( ts[4].pos.column, 6 ); 302 + assert_eq( ts[4].str, ":" ); 303 + 304 + assert_eq( ts[5].pos.lineno, 2 ); 305 + assert_eq( ts[5].pos.column, 7 ); 306 + assert_eq( ts[5].str, "-" ); 305 307 306 - assert( ts[5].pos.lineno == 2 ); 307 - assert( ts[5].pos.column == 7 ); 308 - assert( ts[5].str == "-" ); 308 + assert_eq( ts[6].pos.lineno, 2 ); 309 + assert_eq( ts[6].pos.column, 8 ); 310 + assert_eq( ts[6].str, "(" ); 309 311 310 - assert( ts[6].pos.lineno == 2 ); 311 - assert( ts[6].pos.column == 8 ); 312 - assert( ts[6].str == "(" ); 313 - 314 - assert( ts.length == 7 ); 312 + assert_eq( ts.length, 7 ); 315 313 } 316 314 317 315 unittest 318 316 { 319 317 auto lex2 = lexerFromString(" a12\n3a 5 "); 320 - assert( lex2.front.str == "a12" ); 321 - assert( lex2.front.kind == Token.Kind.identifier ); 318 + assert_eq( lex2.front.str, "a12" ); 322 319 lex2.popFront; 323 320 auto lex3 = lex2.save; 324 - assert( lex2.front.str == "3a" ); 325 - assert( lex2.front.kind == Token.Kind.identifier ); 321 + assert_eq( lex2.front.str, "3a" ); 326 322 lex2.popFront; 327 - assert( lex3.front.str == "3a" ); 328 - assert( lex3.front.kind == Token.Kind.identifier ); 329 - assert( lex2.front.str == "5" ); 330 - assert( lex2.front.kind == Token.Kind.number ); 323 + assert_eq( lex3.front.str, "3a" ); 324 + assert_eq( lex2.front.str, "5" ); 331 325 lex2.popFront; 332 326 lex3.popFront; 333 327 assert( lex2.empty ); 334 328 assert( !lex3.empty ); 335 - assert( lex3.front.str == "5" ); 336 - assert( lex3.front.kind == Token.Kind.number ); 329 + assert_eq( lex3.front.str, "5" ); 337 330 } 338 331 339 332 unittest 340 333 { 341 334 //!! be sure to run the unittest on the root of the source directory 342 335 auto lexf = lexerFromFile("polemy/lex.d"); 343 336 lexf = find!`a.str == "module"`(lexf); 344 - assert( lexf.front.str == "module", lexf.front.str ); 345 - assert( lexf.front.pos.filename == "polemy/lex.d" ); 346 - assert( lexf.front.pos.lineno == 7 ); 347 - assert( lexf.front.pos.column == 1 ); 337 + assert_eq( lexf.front.str, "module" ); 338 + assert_eq( lexf.front.pos.filename, "polemy/lex.d" ); 339 + assert_eq( lexf.front.pos.lineno, 7 ); 340 + assert_eq( lexf.front.pos.column, 1 ); 341 + lexf.popFront; 342 + assert_eq( lexf.front.str, "polemy" ); 343 + assert_eq( lexf.front.pos.lineno, 7 ); 344 + assert_eq( lexf.front.pos.column, 8 ); 345 + lexf.popFront; 346 + assert_eq( lexf.front.str, "." ); 348 347 lexf.popFront; 349 - assert( lexf.front.str == "polemy" ); 350 - assert( lexf.front.pos.lineno == 7 ); 351 - assert( lexf.front.pos.column == 8 ); 348 + assert_eq( lexf.front.str, "lex" ); 352 349 lexf.popFront; 353 - assert( lexf.front.str == "." ); 350 + assert_eq( lexf.front.str, ";" ); 354 351 lexf.popFront; 355 - assert( lexf.front.str == "lex" ); 356 - lexf.popFront; 357 - assert( lexf.front.str == ";" ); 358 - lexf.popFront; 359 - assert( lexf.front.str == "import" ); 360 - assert( lexf.front.pos.lineno == 8 ); 361 - assert( lexf.front.pos.column == 1 ); 352 + assert_eq( lexf.front.str, "import" ); 353 + assert_eq( lexf.front.pos.lineno, 8 ); 354 + assert_eq( lexf.front.pos.column, 1 ); 362 355 } 363 356 364 357 unittest 365 358 { 366 - auto lex = lexerFromString(`my # comment should 367 -# hey!! 359 + auto lex = lexerFromString(`my # comment should`~"\r\n"~`# hey!! 368 360 be ignored. 369 361 hahaha"hihihi""hu\\\"huhu"#123 aa 370 -123 aa "aaa 371 -bbb # 123 372 -eee" 362 +123 aa "aaa`~"\r\n"~`bbb # 123`~"\r\n"~`eee" 373 363 zzz 374 364 `); 375 365 Token[] ts = std.array.array(lex); 376 - assert( ts[0].str == "my" ); 377 - assert( ts[0].pos.lineno == 1 ); 378 - assert( ts[1].str == "be" ); 379 - assert( ts[1].pos.lineno == 3 ); 380 - assert( ts[2].str == "ignored" ); 381 - assert( ts[3].str == "." ); 382 - assert( ts[4].str == "hahaha" ); 383 - assert( ts[4].pos.lineno == 4 ); 384 - assert( ts[4].kind == Token.Kind.identifier ); 385 - assert( ts[5].str == "hihihi" ); 386 - assert( ts[5].pos.lineno == 4 ); 387 - assert( ts[5].kind == Token.Kind.stringLiteral ); 388 - assert( ts[6].str == `hu\"huhu` ); 389 - assert( ts[6].kind == Token.Kind.stringLiteral ); 390 - assert( ts[6].pos.lineno == 4 ); 391 - assert( ts[7].str == "123" ); 392 - assert( ts[7].pos.lineno == 5 ); 393 - assert( ts[7].kind == Token.Kind.number ); 394 - assert( ts[8].str == "aa" ); 395 - assert( ts[9].pos.lineno == 5 ); 396 - assert( ts[9].str == "aaa\nbbb # 123\neee" ); 397 - assert( ts[9].kind == Token.Kind.stringLiteral ); 398 - assert( ts[10].pos.lineno == 8 ); 399 - assert( ts.length == 11 ); 366 + assert_eq( ts[0].str, "my" ); 367 + assert_eq( ts[0].pos.lineno, 1 ); 368 + assert( !ts[0].quoted ); 369 + assert_eq( ts[1].str, "be" ); 370 + assert_eq( ts[1].pos.lineno, 3 ); 371 + assert( !ts[1].quoted ); 372 + assert_eq( ts[2].str, "ignored" ); 373 + assert( !ts[2].quoted ); 374 + assert_eq( ts[3].str, "." ); 375 + assert( !ts[3].quoted ); 376 + assert_eq( ts[4].str, "hahaha" ); 377 + assert_eq( ts[4].pos.lineno, 4 ); 378 + assert( !ts[4].quoted ); 379 + assert_eq( ts[5].str, "hihihi" ); 380 + assert_eq( ts[5].pos.lineno, 4 ); 381 + assert( ts[5].quoted ); 382 + assert_eq( ts[6].str, `hu\"huhu` ); 383 + assert_eq( ts[6].pos.lineno, 4 ); 384 + assert( ts[6].quoted ); 385 + assert_eq( ts[7].str, "123" ); 386 + assert_eq( ts[7].pos.lineno, 5 ); 387 + assert_eq( ts[8].str, "aa" ); 388 + assert_eq( ts[9].pos.lineno, 5 ); 389 + assert_eq( ts[9].str, "aaa\nbbb # 123\neee" ); 390 + assert( ts[9].quoted ); 391 + assert_eq( ts[10].pos.lineno, 8 ); 392 + assert( !ts[10].quoted ); 393 + assert_eq( ts.length, 11 ); 400 394 }