| 120 | | StringParser; |
| 121 | | |
| 122 | | |
| | 84 | |
| | 85 | |
| | 86 | /* |
| | 87 | Returns zero if matching rule not found. |
| | 88 | */ |
| | 89 | static const UCell* _parseStr( StringParser* pe, const UCell* rit, |
| | 90 | const UCell* rend, UIndex* spos ) |
| | 91 | { |
| | 92 | UString* str; |
| | 93 | UIndex pos = *spos; |
| | 94 | |
| | 95 | match: |
| | 96 | |
| | 97 | while( rit != rend ) |
| | 98 | { |
| | 99 | switch( ur_type(rit) ) |
| | 100 | { |
| | 101 | case UT_WORD: |
| | 102 | switch( ur_atom(rit) ) |
| | 103 | { |
| | 104 | case UR_ATOM_OPT: |
| | 105 | { |
| | 106 | const UCell* r2 = rit + 1; |
| | 107 | |
| | 108 | if( r2 == rend ) |
| | 109 | return 0; |
| | 110 | |
| | 111 | _parseStr( pe, r2, r2 + 1, &pos ); |
| | 112 | if( pe->error ) |
| | 113 | return 0; |
| | 114 | rit += 2; |
| | 115 | } |
| | 116 | break; |
| | 117 | |
| | 118 | case UR_ATOM_ANY: |
| | 119 | { |
| | 120 | //until [not eval-pstr second it] |
| | 121 | //it: skip it 2 |
| | 122 | |
| | 123 | const UCell* r2 = rit + 1; |
| | 124 | |
| | 125 | if( r2 == rend ) |
| | 126 | return 0; |
| | 127 | |
| | 128 | if( ur_is(r2, UT_WORD) ) |
| | 129 | { |
| | 130 | UBlock* ctxBlk; |
| | 131 | ur_wordCell( r2, ctxBlk, r2 ); |
| | 132 | } |
| | 133 | |
| | 134 | if( ur_is(r2, UT_BITSET) ) |
| | 135 | { |
| | 136 | const uint8_t* it; |
| | 137 | const uint8_t* end; |
| | 138 | |
| | 139 | str = ur_binPtr( pe->input.n ); |
| | 140 | it = str->ptr.b + pos; |
| | 141 | end = str->ptr.b + str->used; |
| | 142 | |
| | 143 | end = thruChars( r2, it, end ); |
| | 144 | if( end != it ) |
| | 145 | pos = end - str->ptr.b; |
| | 146 | rit += 2; |
| | 147 | } |
| | 148 | else |
| | 149 | { |
| | 150 | ur_throwErr( pe->thread, UR_EX_SCRIPT, |
| | 151 | "parse any expected bitset" ); |
| | 152 | pe->error = 1; |
| | 153 | return 0; |
| | 154 | } |
| | 155 | } |
| | 156 | break; |
| | 157 | |
| | 158 | case UR_ATOM_SOME: |
| | 159 | { |
| | 160 | const UCell* found; |
| | 161 | const UCell* r2end; |
| | 162 | const UCell* r2 = rit + 1; |
| | 163 | |
| | 164 | if( r2 == rend ) |
| | 165 | return 0; |
| | 166 | |
| | 167 | if( ur_is(r2, UT_WORD) ) |
| | 168 | { |
| | 169 | UBlock* ctxBlk; |
| | 170 | ur_wordCell( r2, ctxBlk, r2 ); |
| | 171 | } |
| | 172 | |
| | 173 | if( ur_is(r2, UT_BLOCK) ) |
| | 174 | { |
| | 175 | UBlock* blk2; |
| | 176 | |
| | 177 | blk2 = ur_block(r2); |
| | 178 | r2 = blk2->ptr.cells + r2->series.it; |
| | 179 | r2end = blk2->ptr.cells + blk2->used; |
| | 180 | |
| | 181 | found = _parseStr( pe, r2, r2end, &pos ); |
| | 182 | if( found ) |
| | 183 | { |
| | 184 | do |
| | 185 | { |
| | 186 | found = _parseStr( pe, r2, r2end, &pos ); |
| | 187 | if( pe->error ) |
| | 188 | return 0; |
| | 189 | } |
| | 190 | while( found ); |
| | 191 | rit += 2; |
| | 192 | } |
| | 193 | else |
| | 194 | { |
| | 195 | goto failed_eval; |
| | 196 | } |
| | 197 | } |
| | 198 | else if( ur_is(r2, UT_BITSET) ) |
| | 199 | { |
| | 200 | const uint8_t* it; |
| | 201 | const uint8_t* end; |
| | 202 | |
| | 203 | str = ur_binPtr( pe->input.n ); |
| | 204 | it = str->ptr.b + pos; |
| | 205 | end = str->ptr.b + str->used; |
| | 206 | |
| | 207 | end = thruChars( r2, it, end ); |
| | 208 | if( end == it ) |
| | 209 | { |
| | 210 | rit = nextRule( rit, rend ); |
| | 211 | if( ! rit ) |
| | 212 | return 0; |
| | 213 | } |
| | 214 | else |
| | 215 | { |
| | 216 | pos = end - str->ptr.b; |
| | 217 | rit += 2; |
| | 218 | } |
| | 219 | } |
| | 220 | else |
| | 221 | { |
| | 222 | ur_throwErr( pe->thread, UR_EX_SCRIPT, |
| | 223 | "parse some expected block or bitset" ); |
| | 224 | pe->error = 1; |
| | 225 | return 0; |
| | 226 | } |
| | 227 | } |
| | 228 | break; |
| | 229 | |
| | 230 | case UR_ATOM_BREAK: |
| | 231 | return rit; |
| | 232 | |
| | 233 | case UR_ATOM_BAR: |
| | 234 | goto complete; |
| | 235 | |
| | 236 | case UR_ATOM_TO: |
| | 237 | case UR_ATOM_THRU: |
| | 238 | { |
| | 239 | int si; |
| | 240 | const UCell* r2 = rit + 1; |
| | 241 | |
| | 242 | if( r2 == rend ) |
| | 243 | return 0; |
| | 244 | |
| | 245 | if( ! ur_is( r2, UT_STRING ) ) |
| | 246 | return 0; |
| | 247 | |
| | 248 | str = ur_bin(r2); |
| | 249 | si = ur_findString( ur_binPtr( pe->input.n ), pos, |
| | 250 | str, r2->series.it ); |
| | 251 | if( si > -1 ) |
| | 252 | { |
| | 253 | if( ur_atom(rit) == UR_ATOM_TO ) |
| | 254 | pos = si; |
| | 255 | else |
| | 256 | pos = si + str->used - r2->series.it; |
| | 257 | rit += 2; |
| | 258 | } |
| | 259 | else |
| | 260 | { |
| | 261 | goto failed; |
| | 262 | } |
| | 263 | } |
| | 264 | break; |
| | 265 | |
| | 266 | case UR_ATOM_SKIP: |
| | 267 | //if( pos >= str->used ) |
| | 268 | // return 0; |
| | 269 | ++rit; |
| | 270 | ++pos; |
| | 271 | break; |
| | 272 | |
| | 273 | /* |
| | 274 | case UR_ATOM_SET: |
| | 275 | case UR_ATOM_COPY: |
| | 276 | break; |
| | 277 | */ |
| | 278 | |
| | 279 | default: |
| | 280 | { |
| | 281 | const UCell* wval; |
| | 282 | const UBlock* cblk; |
| | 283 | |
| | 284 | ur_wordCell( rit, cblk, wval ); |
| | 285 | |
| | 286 | if( ur_is(wval, UT_CHAR) ) |
| | 287 | { |
| | 288 | str = ur_binPtr( pe->input.n ); |
| | 289 | if( str->ptr.c[ pos ] == ur_char(wval) ) |
| | 290 | { |
| | 291 | ++rit; |
| | 292 | ++pos; |
| | 293 | } |
| | 294 | else |
| | 295 | goto failed; |
| | 296 | } |
| | 297 | else if( ur_is(wval, UT_BLOCK) ) |
| | 298 | { |
| | 299 | // Same as case UT_BLOCK below. |
| | 300 | cblk = ur_block( wval ); |
| | 301 | wval = _parseStr( pe,cblk->ptr.cells + wval->series.it, |
| | 302 | cblk->ptr.cells + cblk->used, |
| | 303 | &pos ); |
| | 304 | if( wval ) |
| | 305 | ++rit; |
| | 306 | else |
| | 307 | goto failed_eval; |
| | 308 | } |
| | 309 | else if( ur_is(wval, UT_BITSET) ) |
| | 310 | { |
| | 311 | UString* bin = ur_bin( wval ); |
| | 312 | int c; |
| | 313 | |
| | 314 | str = ur_binPtr( pe->input.n ); |
| | 315 | c = str->ptr.c[ pos ]; |
| | 316 | if( ur_bitIsSet( bin->ptr.b, c ) ) |
| | 317 | { |
| | 318 | ++rit; |
| | 319 | ++pos; |
| | 320 | } |
| | 321 | else |
| | 322 | goto failed; |
| | 323 | } |
| | 324 | else |
| | 325 | { |
| | 326 | ur_throwErr( pe->thread, UR_EX_SCRIPT, |
| | 327 | "parse expected char, block or bitset" ); |
| | 328 | pe->error = 1; |
| | 329 | return 0; |
| | 330 | } |
| | 331 | } |
| | 332 | break; |
| | 333 | } |
| | 334 | break; |
| | 335 | |
| | 336 | case UT_SETWORD: |
| | 337 | { |
| | 338 | UCell* wval; |
| | 339 | UBlock* ctxBlk; |
| | 340 | |
| | 341 | ur_wordCell( rit, ctxBlk, wval ); |
| | 342 | |
| | 343 | ur_setType( wval, UT_STRING ); |
| | 344 | ur_setSeries( wval, pe->input.n, pos ); |
| | 345 | |
| | 346 | ++rit; |
| | 347 | } |
| | 348 | break; |
| | 349 | |
| | 350 | case UT_INT: |
| | 351 | { |
| | 352 | const UCell* r2 = rit + 1; |
| | 353 | |
| | 354 | if( r2 == rend ) |
| | 355 | return 0; |
| | 356 | |
| | 357 | if( ur_is(r2, UT_INT) ) |
| | 358 | { |
| | 359 | int count = 0; |
| | 360 | int maxCount = ur_int(r2); |
| | 361 | |
| | 362 | ++r2; |
| | 363 | if( r2 == rend ) |
| | 364 | return 0; |
| | 365 | |
| | 366 | while( 1 ) |
| | 367 | { |
| | 368 | if( ! _parseStr( pe, r2, r2 + 1, &pos ) ) |
| | 369 | break; |
| | 370 | ++count; |
| | 371 | } |
| | 372 | |
| | 373 | if( pe->error ) |
| | 374 | return 0; |
| | 375 | if( (count >= ur_int(rit)) && (count <= maxCount) ) |
| | 376 | rit += 3; |
| | 377 | else |
| | 378 | goto failed; |
| | 379 | } |
| | 380 | } |
| | 381 | break; |
| | 382 | |
| | 383 | case UT_CHAR: |
| | 384 | str = ur_binPtr( pe->input.n ); |
| | 385 | if( str->ptr.c[ pos ] == ur_char(rit) ) |
| | 386 | { |
| | 387 | ++rit; |
| | 388 | ++pos; |
| | 389 | } |
| | 390 | else |
| | 391 | goto failed; |
| | 392 | break; |
| | 393 | |
| | 394 | case UT_BLOCK: |
| | 395 | { |
| | 396 | const UCell* found; |
| | 397 | const UBlock* cblk; |
| | 398 | |
| | 399 | cblk = ur_block( rit ); |
| | 400 | found = _parseStr( pe, cblk->ptr.cells + rit->series.it, |
| | 401 | cblk->ptr.cells + cblk->used, &pos ); |
| | 402 | if( found ) |
| | 403 | ++rit; |
| | 404 | else |
| | 405 | goto failed_eval; |
| | 406 | } |
| | 407 | break; |
| | 408 | |
| | 409 | case UT_PAREN: |
| | 410 | { |
| | 411 | ur_eval( pe->thread, rit->series.n, rit->series.it ); |
| | 412 | if( _errorThrown ) |
| | 413 | { |
| | 414 | pe->error = 1; |
| | 415 | return 0; |
| | 416 | } |
| | 417 | ++rit; |
| | 418 | } |
| | 419 | break; |
| | 420 | |
| | 421 | case UT_STRING: |
| | 422 | { |
| | 423 | UString* mstr = ur_bin(rit); |
| | 424 | |
| | 425 | str = ur_binPtr( pe->input.n ); |
| | 426 | /* |
| | 427 | if( pe->matchCase ) |
| | 428 | pos = orMatchStringCase(str, pos, mstr, rit->series.it); |
| | 429 | else |
| | 430 | */ |
| | 431 | pos = ur_matchString( str, pos, mstr, rit->series.it ); |
| | 432 | |
| | 433 | if( pos ) |
| | 434 | ++rit; |
| | 435 | else |
| | 436 | goto failed; |
| | 437 | } |
| | 438 | break; |
| | 439 | |
| | 440 | default: |
| | 441 | ur_throwErr( pe->thread, UR_EX_SCRIPT, |
| | 442 | "invalid parse value" ); |
| | 443 | //orDatatypeName( ur_type(rit) ) ); |
| | 444 | pe->error = 1; |
| | 445 | return 0; |
| | 446 | } |
| | 447 | } |
| | 448 | |
| | 449 | complete: |
| | 450 | |
| | 451 | *spos = pos; |
| | 452 | return rit; |
| | 453 | |
| | 454 | failed_eval: |
| | 455 | |
| | 456 | if( pe->error ) |
| | 457 | return 0; |
| | 458 | |
| | 459 | failed: |
| | 460 | |
| | 461 | rit = nextRule( rit, rend ); |
| | 462 | if( rit ) |
| | 463 | { |
| | 464 | pos = *spos; |
| | 465 | goto match; |
| | 466 | } |
| | 467 | return 0; |
| | 468 | } |
| | 469 | |
| | 470 | |
| | 471 | #if 0 |
| 184 | | if( ur_is(rit, UT_WORD) ) |
| 185 | | { |
| | 525 | cp = "\t\n\r ,;"; |
| | 526 | while( *cp ) |
| | 527 | { |
| | 528 | orSetBit( delim, *cp ); |
| | 529 | ++cp; |
| | 530 | } |
| | 531 | } |
| | 532 | orRefPush( UT_STRING, orStringN( custom ) ); |
| | 533 | } |
| | 534 | else |
| | 535 | { |
| | 536 | custom = 0; |
| | 537 | delim = (uint8_t*) memAlloc(32); |
| | 538 | memCpy( delim, charset_white, 32 ); |
| | 539 | orSetBit( delim, ',' ); |
| | 540 | orSetBit( delim, ';' ); |
| | 541 | } |
| | 542 | |
| | 543 | |
| | 544 | assert( ser->series.it <= str->used ); |
| | 545 | |
| | 546 | cp = str->ptr.c + ser->series.it; |
| | 547 | end = str->ptr.c + str->used; |
| | 548 | sstart = cp; |
| | 549 | |
| | 550 | while( cp != end ) |
| | 551 | { |
| | 552 | int c = *cp; |
| | 553 | if( ur_bitIsSet(delim, c) ) |
| | 554 | { |
| | 555 | if( cp > sstart ) |
| | 556 | { |
| | 557 | orAppendString( rblk, orMakeCString(sstart, cp-sstart) ); |
| | 558 | } |
| | 559 | sstart = cp + 1; |
| | 560 | } |
| | 561 | ++cp; |
| | 562 | } |
| | 563 | |
| | 564 | if( cp > sstart ) |
| | 565 | { |
| | 566 | orAppendString( rblk, orMakeCString(sstart, cp-sstart) ); |
| | 567 | } |
| | 568 | |
| | 569 | if( custom ) |
| | 570 | { |
| | 571 | orRefPop( 1 ); |
| | 572 | orArrayFree( custom ); // Garbage collection will free this later. |
| | 573 | } |
| | 574 | else |
| | 575 | { |
| | 576 | memFree( delim ); |
| | 577 | } |
| | 578 | |
| | 579 | orRefPop( 2 ); |
| | 580 | orResultBLOCK( rblkN ); |
| | 581 | } |
| | 582 | #endif |
| | 583 | |
| | 584 | |
| | 585 | /*==========================================================================*/ |
| | 586 | |
| | 587 | |
| | 588 | #if 0 |
| | 589 | /** |
| | 590 | Returns index in blkV where fval is found or -1 if fval is not found. |
| | 591 | */ |
| | 592 | static int _findBlock( const UBlock* blk, UIndex pos, const UCell* fval ) |
| | 593 | { |
| | 594 | UCell* it = blk->ptr.cells + pos; |
| | 595 | UCell* end = blk->ptr.cells + blk->used; |
| | 596 | |
| | 597 | // TODO: If fval is block then all values must match. |
| | 598 | |
| | 599 | assert( pos <= blk->used ); |
| | 600 | |
| | 601 | while( it != end ) |
| | 602 | { |
| | 603 | if( orEqual(it, fval) ) |
| | 604 | return it - blk->ptr.cells; |
| | 605 | ++it; |
| | 606 | } |
| | 607 | return -1; |
| | 608 | } |
| | 609 | |
| | 610 | |
| | 611 | /* |
|