Changeset 156
- Timestamp:
- 05/21/06 18:16:04 (2 years ago)
- Files:
-
- trunk/orca/ChangeLog (modified) (1 diff)
- trunk/orca/parse.c (modified) (27 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/orca/ChangeLog
r71 r156 2 2 3 3 4 V0.0.24 - ?? March 2006 5 4 V0.0.24 - 21 May 2006 5 6 * Improved parse. 6 7 * Added make-dir. 7 8 * Bugfixes. trunk/orca/parse.c
r154 r156 21 21 #include "os.h" 22 22 #include "ovalue.h" 23 #include "charset.h"24 23 #include "orca_atoms.h" 24 #include "internal.h" 25 26 27 #define PARSE_EX 28 #define orChar orInt 29 #define UChar char 30 #define OR_ITER_BLOCK(ita,itb,blk,scell) \ 31 ita = blk->values + scell->series.it; \ 32 itb = blk->values + blk->used; 33 25 34 26 35 … … 29 38 extern int orFindString( const OString* strA, int iA, 30 39 const OString* strB, int iB ); 40 41 42 typedef struct 43 { 44 uint8_t type; 45 uint8_t flags; 46 uint8_t datatype; 47 uint8_t _pad; 48 OIndex n; /* Series number */ 49 OIndex it; /* Element Iterator */ 50 OIndex end; /* Slice end */ 51 } 52 UCellSlice; 53 54 55 typedef struct 56 { 57 //UThread* thread; 58 OValue* rules; 59 UCellSlice input; 60 OString* str; 61 int error; 62 int matchCase; 63 } 64 StringParser; 65 66 67 typedef struct 68 { 69 //UThread* thread; 70 OValue* rules; 71 UCellSlice input; 72 OString* blk; 73 int error; 74 } 75 BlockParser; 31 76 32 77 … … 48 93 49 94 50 static const uint8_t* thruChars( const OValue* bitsetVal, 51 const uint8_t* it, 52 const uint8_t* end ) 95 static int _repeatChar( OString* input, OIndex pos, int limit, int c ) 53 96 { 97 UChar* start; 98 UChar* it; 99 UChar* end; 100 101 it = input->charArray; 102 end = it + input->used; 103 it += pos; 104 if( end > (it + limit) ) 105 end = it + limit; 106 107 start = it; 108 while( it != end ) 109 { 110 if( *it != c ) 111 break; 112 ++it; 113 } 114 return it - start; 115 } 116 117 118 static int _repeatBitset( OString* input, OIndex pos, int limit, 119 const OValue* patc ) 120 { 121 UChar* start; 122 UChar* it; 123 UChar* end; 54 124 int c; 55 OBinary* bin = orSTRING( bitsetVal);125 OBinary* bin = orSTRING(patc); 56 126 const uint8_t* bits = bin->byteArray; 57 127 int maxC = bin->used * 8; 58 128 129 it = input->charArray; 130 end = it + input->used; 131 it += pos; 132 if( end > (it + limit) ) 133 end = it + limit; 134 135 start = it; 59 136 while( it != end ) 60 137 { … … 66 143 ++it; 67 144 } 68 return it ;145 return it - start; 69 146 } 70 147 … … 73 150 Returns zero if matching rule not found. 74 151 */ 75 static const OValue* evalParseStr( const OValue* rit, const OValue* rend,76 OIndex strN, int* spos )152 static const OValue* _parseStr( StringParser* pe, const OValue* rit, 153 const OValue* rend, OIndex* spos ) 77 154 { 78 OString* str; 79 int atom; 80 int pos = *spos; 155 const OBlock* cblk; 156 const OValue* tval; 157 int32_t repMin; 158 int32_t repMax; 159 OString* istr; 160 OIndex pos = *spos; 161 162 istr = pe->str; 81 163 82 164 match: … … 87 169 { 88 170 case OT_WORD: 89 atom = orAtom(rit); 90 91 if( atom == OR_ATOM_OPT ) 92 { 93 const OValue* r2 = rit + 1; 94 95 if( r2 == rend ) 171 switch( orAtom(rit) ) 172 { 173 case OR_ATOM_OPT: 174 ++rit; 175 repMin = 0; 176 repMax = 1; 177 goto repeat; 178 179 case OR_ATOM_ANY: 180 ++rit; 181 repMin = 0; 182 repMax = 0x7fffffff; 183 goto repeat; 184 185 case OR_ATOM_SOME: 186 ++rit; 187 repMin = 1; 188 repMax = 0x7fffffff; 189 goto repeat; 190 191 case OR_ATOM_BREAK: 192 return rit; 193 194 case OR_ATOM_BAR: 195 goto complete; 196 197 case OR_ATOM_TO: 198 case OR_ATOM_THRU: 199 { 200 OAtom ratom = orAtom(rit); 201 202 ++rit; 203 if( rit == rend ) 96 204 return 0; 97 205 98 evalParseStr( r2, r2 + 1, strN, &pos ); 99 if( orErrorThrown ) 100 return 0; 101 rit += 2; 102 } 103 else if( atom == OR_ATOM_ANY ) 104 { 105 //until [not eval-pstr second it] 106 //it: skip it 2 107 108 const OValue* r2 = rit + 1; 109 110 if( r2 == rend ) 111 return 0; 112 113 if( orIs(r2, OT_WORD) ) 206 if( orIs(rit, OT_WORD) ) 114 207 { 115 OBlock* ctxBlk; 116 orWordVal( r2, ctxBlk, r2 ); 117 } 118 119 if( orIs(r2, OT_BITSET) ) 120 { 121 const uint8_t* it; 122 const uint8_t* end; 123 124 str = orStringPtr(strN); 125 it = str->byteArray + pos; 126 end = str->byteArray + str->used; 127 128 end = thruChars( r2, it, end ); 129 if( end != it ) 130 pos = end - str->byteArray; 131 rit += 2; 208 orWordVal( rit, cblk, tval ); 132 209 } 133 210 else 134 211 { 135 orError( "parse any expected bitset" ); 212 tval = rit; 213 } 214 215 switch( orType(tval) ) 216 { 217 case OT_CHAR: 218 { 219 int c = orChar(tval); 220 UChar* cp = istr->charArray + pos; 221 UChar* end = istr->charArray + pe->input.end; 222 while( cp != end ) 223 { 224 if( *cp == c ) 225 break; 226 ++cp; 227 } 228 if( cp == end ) 229 goto failed; 230 pos = cp - istr->charArray; 231 if( ratom == OR_ATOM_THRU ) 232 ++pos; 233 } 234 break; 235 236 case OT_STRING: 237 { 238 OString* pat = orSTRING(tval); 239 pos = orFindString( istr, pos, 240 pat, tval->series.it ); 241 if( pos < 0 ) 242 goto failed; 243 if( ratom == OR_ATOM_THRU ) 244 pos += pat->used - tval->series.it; 245 } 246 break; 247 248 case OT_BITSET: 249 { 250 OBinary* bin = orSTRING(tval); 251 const uint8_t* bits = bin->byteArray; 252 int maxC = bin->used * 8; 253 UChar* cp = istr->charArray + pos; 254 UChar* end = istr->charArray + pe->input.end; 255 int c; 256 while( cp != end ) 257 { 258 c = *cp; 259 if( c < maxC ) 260 { 261 if( orBitIsSet( bits, c ) ) 262 break; 263 } 264 ++cp; 265 } 266 if( cp == end ) 267 goto failed; 268 pos = cp - istr->charArray; 269 if( ratom == OR_ATOM_THRU ) 270 ++pos; 271 } 272 break; 273 274 case OT_BLOCK: 275 // TODO 276 orError( PARSE_EX 277 "to/thru block! not implemented" ); 278 pe->error = 1; 279 return 0; 280 } 281 ++rit; 282 } 283 break; 284 285 case OR_ATOM_SKIP: 286 // TODO - int! skip 287 //if( pos >= istr->used ) 288 // return 0; 289 ++rit; 290 ++pos; 291 break; 292 293 //case OR_ATOM_COPY: 294 295 default: 296 orWordVal( rit, cblk, tval ); 297 298 if( orIs(tval, OT_CHAR) ) 299 goto match_char; 300 else if( orIs(tval, OT_STRING) ) 301 goto match_string; 302 else if( orIs(tval, OT_BLOCK) ) 303 goto match_block; 304 else if( orIs(tval, OT_BITSET) ) 305 goto match_bitset; 306 else 307 { 308 orError( PARSE_EX 309 "parse expected char!/block!/bitset!" ); 310 pe->error = 1; 136 311 return 0; 137 312 } 138 } 139 else if( atom == OR_ATOM_SOME ) 140 { 141 const OValue* found; 142 const OValue* r2end; 143 const OValue* r2 = rit + 1; 144 145 if( r2 == rend ) 146 return 0; 147 148 if( orIs(r2, OT_WORD) ) 149 { 150 OBlock* ctxBlk; 151 orWordVal( r2, ctxBlk, r2 ); 152 } 153 154 if( orIs(r2, OT_BLOCK) ) 155 { 156 OBlock* blk2; 157 158 blk2 = orBLOCK(r2); 159 r2 = blk2->values + r2->series.it; 160 r2end = blk2->values + blk2->used; 161 162 found = evalParseStr( r2, r2end, strN, &pos ); 163 if( found ) 164 { 165 do 166 { 167 found = evalParseStr( r2, r2end, strN, &pos ); 168 if( orErrorThrown ) 169 return 0; 170 } 171 while( found ); 172 rit += 2; 173 } 174 else 175 { 176 goto failed_eval; 177 } 178 } 179 else if( orIs(r2, OT_BITSET) ) 180 { 181 const uint8_t* it; 182 const uint8_t* end; 183 184 str = orStringPtr(strN); 185 it = str->byteArray + pos; 186 end = str->byteArray + str->used; 187 188 end = thruChars( r2, it, end ); 189 if( end == it ) 190 { 191 rit = nextRule( rit, rend ); 192 if( ! rit ) 193 return 0; 194 } 195 else 196 { 197 pos = end - str->byteArray; 198 rit += 2; 199 } 200 } 201 else 202 { 203 orError( "parse some expected block or bitset" ); 204 return 0; 205 } 206 } 207 else if( atom == OR_ATOM_BREAK ) 208 { 209 return rit; 210 } 211 else if( atom == OR_ATOM_BAR ) 212 { 213 goto complete; 214 } 215 else if( atom == OR_ATOM_TO ) 216 { 217 int si; 218 const OValue* r2 = rit + 1; 219 220 if( r2 == rend ) 221 return 0; 222 223 if( ! orIsString( orType(r2) ) ) 224 return 0; 225 226 si = orFindString( orStringPtr(strN), pos, 227 orSTRING(r2), r2->series.it ); 228 if( si > -1 ) 229 { 230 pos = si; 231 rit += 2; 232 } 233 else 234 { 235 goto failed; 236 } 237 } 238 else if( atom == OR_ATOM_SKIP ) 239 { 240 //if( pos >= str->used ) 241 // return 0; 242 ++rit; 243 ++pos; 244 } 245 /* 246 else if( atom == OR_ATOM_THRU ) 247 { 248 } 249 else if( atom == OR_ATOM_SET ) 250 { 251 } 252 else if( atom == OR_ATOM_COPY ) 253 { 254 } 255 */ 256 else 257 { 258 const OValue* wval; 259 const OBlock* cblk; 260 261 orWordVal( rit, cblk, wval ); 262 263 if( orIs(wval, OT_BLOCK) ) 264 { 265 // Same as case OT_BLOCK below. 266 cblk = orBLOCK( wval ); 267 wval = evalParseStr( cblk->values + wval->series.it, 268 cblk->values + cblk->used, 269 strN, &pos ); 270 if( wval ) 271 ++rit; 272 else 273 goto failed_eval; 274 } 275 else if( orIs(wval, OT_BITSET) ) 276 { 277 OString* bin = orSTRING( wval ); 278 int c; 279 280 str = orStringPtr(strN); 281 c = str->charArray[ pos ]; 282 if( orBitIsSet( bin->byteArray, c ) ) 283 { 284 ++rit; 285 ++pos; 286 } 287 else 288 goto failed; 289 } 290 else 291 { 292 orError( "parse expected block or bitset" ); 293 return 0; 294 } 313 break; 295 314 } 296 315 break; … … 298 317 case OT_SETWORD: 299 318 { 300 OValue* wval; 301 OBlock* ctxBlk; 302 303 orWordVal( rit, ctxBlk, wval ); 304 305 orSetTF( wval, OT_STRING ); 306 orSetSeries( wval, strN, pos ); 307 319 OValue* cell; 320 orWordVal( rit, cblk, cell ); 308 321 ++rit; 322 323 orSetTF( cell, OT_STRING ); 324 orSetSeries( cell, pe->input.n, pos ); 309 325 } 310 326 break; 311 327 #if 0 328 case OT_GETWORD: 329 { 330 OValue* cell; 331 orWordVal( rit, cblk, cell ); 332 ++rit; 333 334 if( orIs(cell, OT_SLICE) && (cell->series.n == pe->input.n) ) 335 cell->slice.end = pos; 336 } 337 break; 338 #endif 312 339 case OT_INTEGER: 313 {314 const OValue* r2 = rit + 1; 315 316 if( r 2== rend )340 repMin = orInt(rit); 341 342 ++rit; 343 if( rit == rend ) 317 344 return 0; 318 345 319 if( orIs(r2, OT_INTEGER) ) 320 { 321 int count = 0; 322 int maxCount = orInt(r2); 323 324 ++r2; 325 if( r2 == rend ) 326 return 0; 327 328 while( 1 ) 329 { 330 if( ! evalParseStr( r2, r2 + 1, strN, &pos ) ) 331 break; 332 ++count; 333 } 334 335 if( orErrorThrown ) 336 return 0; 337 if( (count >= orInt(rit)) && (count <= maxCount) ) 338 rit += 3; 339 else 340 goto failed; 341 } 342 } 343 break; 346 if( orIs(rit, OT_INTEGER) ) 347 { 348 repMax = orInt(rit); 349 ++rit; 350 } 351 else 352 { 353 repMax = repMin; 354 } 355 goto repeat; 344 356 345 357 case OT_CHAR: 346 str = orStringPtr(strN); 347 if( str->charArray[ pos ] == orInt(rit) ) 358 tval = rit; 359 match_char: 360 if( istr->charArray[ pos ] == orChar(tval) ) 348 361 { 349 362 ++rit; … … 355 368 356 369 case OT_BLOCK: 370 tval = rit; 371 match_block: 372 cblk = orBLOCK( tval ); 373 tval = _parseStr( pe, cblk->values + tval->series.it, 374 cblk->values + cblk->used, &pos ); 375 istr = pe->str; 376 if( ! tval ) 377 goto failed_eval; 378 ++rit; 379 break; 380 381 case OT_PAREN: 382 orEvalBlock( orBlockPtr(rit->series.n), rit->series.it ); 383 if( orErrorThrown ) 384 { 385 pe->error = 1; 386 return 0; 387 } 388 389 /* Re-aquire pointer & check if input modified. */ 390 istr = pe->str = orStringPtr( pe->input.n ); 391 if( istr->used < pe->input.end ) 392 pe->input.end = istr->used; 393 394 ++rit; 395 break; 396 397 case OT_STRING: 398 tval = rit; 399 match_string: 357 400 { 358 const OValue* found; 359 const OBlock* cblk; 360 361 cblk = orBLOCK( rit ); 362 found = evalParseStr( cblk->values + rit->series.it, 363 cblk->values + cblk->used, 364 strN, &pos ); 365 if( found ) 366 ++rit; 367 else 368 goto failed_eval; 369 } 370 break; 371 372 case OT_PAREN: 373 { 374 orEvalBlock( orBLOCK(rit), rit->series.it ); 375 if( orErrorThrown ) 376 return 0; 377 ++rit; 378 } 379 break; 380 381 case OT_STRING: 382 { 383 OString* mstr = orSTRING(rit); 384 385 str = orStringPtr(strN); 401 OString* pat = orSTRING( tval ); 402 386 403 /* 387 if( useCase )388 pos = orMatchStringCase( str, pos, mstr, rit->series.it);404 if( pe->matchCase ) 405 pos = orMatchStringCase(istr, pos, pat, tval->series.it); 389 406 else 390 407 */ 391 pos = orMatchString( str, pos, mstr, rit->series.it );408 pos = orMatchString( istr, pos, pat, tval->series.it ); 392 409 393 410 if( pos ) … … 398 415 break; 399 416 417 case OT_BITSET: 418 tval = rit; 419 match_bitset: 420 { 421 OString* bin = orSTRING( tval ); 422 int c; 423 424 c = istr->charArray[ pos ]; 425 if( orBitIsSet( bin->byteArray, c ) ) 426 { 427 ++rit; 428 ++pos; 429 } 430 else 431 goto failed; 432 } 433 break; 434 400 435 default: 401 orError( "invalid parse value %s", 402 orDatatypeName( orType(rit) ) ); 436 orError( PARSE_EX "invalid parse value" ); 437 //orDatatypeName( orType(rit) ) ); 438 pe->error = 1; 403 439 return 0; 404 440 } … … 410 446 return rit; 411 447 448 repeat: 449 450 /* Repeat rit for repMin to repMax times. */ 451 452 if( rit == rend ) 453 { 454 orError( PARSE_EX "Enexpected end of parse rule" ); 455 pe->error = 1; 456 return 0; 457 } 458 else 459 { 460 int count; 461 462 if( orIs(rit, OT_WORD) ) 463 { 464 orWordVal( rit, cblk, tval ); 465 } 466 else 467 { 468 tval = rit; 469 } 470 471 switch( orType(tval) ) 472 { 473 case OT_CHAR: 474 count = _repeatChar( istr, pos, repMax, orChar(tval) ); 475 pos += count; 476 break; 477 478 case OT_STRING: 479 { 480 OString* pat; 481 int p2; 482 483 count = 0; 484 pat = orSTRING(tval); 485 486 while( count < repMax ) 487 { 488 p2 = orMatchString( istr, pos, pat, tval->series.it ); 489 if( ! p2 ) 490 break; 491 pos = p2; 492 ++count; 493 } 494 } 495 break; 496 497 case OT_BITSET: 498 count = _repeatBitset( istr, pos, repMax, tval ); 499 pos += count; 500 break; 501 502 case OT_BLOCK: 503 { 504 OValue* ci; 505 OValue* ce; 506 507 count = 0; 508 cblk = orBLOCK( tval ); 509 OR_ITER_BLOCK( ci, ce, cblk, tval ); 510 511 while( count < repMax ) 512 { 513 if( pos == pe->input.end ) 514 break; 515 if( ! _parseStr( pe, ci, ce, &pos ) ) 516 break; 517 ++count; 518 } 519 if( pe->error ) 520 return 0; 521 istr = pe->str; 522 } 523 break; 524 525 default: 526 orError( PARSE_EX "Invalid parse rule" ); 527 pe->error = 1; 528 return 0; 529 } 530 531 if( count < repMin ) 532 goto failed; 533 ++rit; 534 } 535 goto match; 536 412 537 failed_eval: 413 538 414 if( orErrorThrown)539 if( pe->error ) 415 540 return 0; 416 541 … … 427 552 428 553 429 /* 430 a1 points to string OValue. 431 rules points to rule block OValue. 432 */ 433 static void parseStringRules( OValue* a1, OValue* rules, int all, 434 int useCase ) 554 #if 0 555 static const char* _parseStrFailedMessage( StringParser* pe, 556 const UChar* it, const UChar* end ) 435 557 { 436 OBlock* rblk = orBLOCK(rules); 437 const OValue* rit = rblk->values + rules->series.it; 438 const OValue* rend = rblk->values + rblk->used; 439 OIndex strN = a1->series.n; 440 int si = a1->series.it; 441 int lresult = 0; 442 443 444 (void) all; 445 (void) useCase; 446 447 orRefAvailErr( 2 ); 448 orRefPush( OT_BLOCK, rules->series.n ); 449 orRefPush( OT_STRING, strN ); 450 451 rit = evalParseStr( rit, rend, strN, &si ); 452 453 orRefPop( 2 ); 454 455 if( rit ) 456 { 457 OString* str = orStringPtr(strN); 458 if( si >= str->used ) 459 lresult = 1; 460 } 461 else if( orErrorThrown ) 462 { 463 return; 464 } 465 orResult( OT_LOGIC, lresult ); 558 UChar* dest; 559 OString* str = orStringPtr( pe->thread->callTempBinN ); 560 str->used = 0; 561 ur_strCat( str, "parse failed at \"", 17 ); 562 563 orArrayReserve( str, sizeof(UChar), str->used + (end - it) + 2 ); 564 dest = str->charArray + str->used; 565 while( it != end ) 566 { 567 if( *it == '\n' ) 568 break; 569 *dest++ = *it++; 570 //++str->used; 571 } 572 *dest++ = '"'; 573 *dest = '\0'; 574 575 return str->charArray; 466 576 } 467 468 469 static void parseString( OValue* a1, OValue* rules, int all, int useCase ) 577 #endif 578 579 580 static void _parseStringS( OValue* a1, OValue* rules, int all, int useCase ) 470 581 { 471 582 OBinary* custom; … … 476 587 OBlock* rblk; 477 588 OIndex rblkN; 589 OIndex rblkHold; 590 OIndex customHold; 478 591 OString* str = orSTRING(a1); 479 592 480 593 (void) useCase; 481 482 orRefAvailErr( 2 );483 orRefPush( OT_STRING, a1->index );484 594 485 595 rblk = orMakeBlock( 0 ); 486 596 rblkN = orBlockN( rblk ); 487 488 orRefPush( OT_BLOCK, rblkN ); 597 rblkHold = orHold( OT_BLOCK, rblkN ); 489 598 490 599 if( orIs(rules, OT_STRING) ) 491 600 { 492 601 custom = orMakeCharset( orSTRING(rules), rules->series.it ); 602 customHold = orHold( OT_BINARY, orStringN(custom) ); 493 603 delim = custom->byteArray; 494 604 if( ! all ) … … 501 611 } 502 612 } 503 orRefPush( OT_STRING, orStringN( custom ) );504 613 } 505 614 else … … 540 649 if( custom ) 541 650 { 542 orRe fPop( 1);651 orRelease( customHold ); 543 652 orArrayFree( custom ); // Garbage collection will free this later. 544 653 } … … 548 657 } 549 658 550 orRe fPop( 2);659 orRelease( rblkHold ); 551 660 orResultBLOCK( rblkN ); 552 661 } … … 556 665 557 666 667 #if 0 558 668 /** 559 Returns index in blk Vwhere fval is found or -1 if fval is not found.669 Returns index in blk where fval is found or -1 if fval is not found. 560 670 */ 561 671 static int _findBlock( const OBlock* blk, OIndex pos, const OValue* fval ) 562 672 { 563 OValue* it = blk->values + pos;564 OValue* end = blk->values + blk->used;673 const OValue* it = blk->values + pos; 674 const OValue* end = blk->values + blk->used; 565 675 566 676 // TODO: If fval is block then all values must match. … … 570 680 while( it != end ) 571 681 { 572 if( orEqual(it, fval) )682 if( ur_equal(it, fval) ) 573 683 return it - blk->values; 574 684 ++it; … … 576 686 return -1; 577 687 } 688 #endif 578 689 579 690 … … 581 692 Returns zero if matching rule not found. 582 693 */ 583 static const OValue* evalParseBlock( const OValue* rit, const OValue* rend,584 OIndex blkN, int* spos )694 static const OValue* _parseBlock( BlockParser* pe, const OValue* rit, 695 const OValue* rend, int* spos ) 585 696 { 586 OBlock* blk; 587 OValue* val; 588 int atom; 589 int pos = *spos; 697 const OBlock* cblk; 698 const OValue* tval; 699 int32_t repMin; 700 int32_t repMax; 701 OAtom atom; 702 OBlock* iblk; 703 OIndex pos = *spos; 704 705 iblk = pe->blk; 590 706 591 707 match: … … 601 717 { 602 718 // Datatype 603 blk = orBlockPtr(blkN); 604 val = blk->values + pos; 605 if( orType(val) != orAtom(rit) ) 719 if( pos >= pe->input.end ) 720 goto failed; 721 tval = iblk->values + pos; 722 if( orType(tval) != atom ) 606 723 { 607 if( orAtom(rit)== OT_NUMBER )724 if( atom == OT_NUMBER ) 608 725
