Changeset 156 for trunk/orca
- Timestamp:
- 05/21/06 18:16:04 (3 years ago)
- Location:
- trunk/orca
- Files:
-
- 2 modified
Legend:
- Unmodified
- Added
- Removed
-
trunk/orca/ChangeLog
r71 r156 2 2 3 3 4 V0.0.24 - ?? March 2006 5 4 V0.0.24 - 21 May 2006 5 6 * Improved parse. 6 7 * Added make-dir. 7 8 * Bugfixes. -
trunk/orca/parse.c
r154 r156 21 21 #include "os.h" 22 22 #include "ovalue.h" 23 #include "charset.h"24 23 #include "orca_atoms.h" 24 #include "internal.h" 25 26 27 #define PARSE_EX 28 #define orChar orInt 29 #define UChar char 30 #define OR_ITER_BLOCK(ita,itb,blk,scell) \ 31 ita = blk->values + scell->series.it; \ 32 itb = blk->values + blk->used; 33 25 34 26 35 … … 29 38 extern int orFindString( const OString* strA, int iA, 30 39 const OString* strB, int iB ); 40 41 42 typedef struct 43 { 44 uint8_t type; 45 uint8_t flags; 46 uint8_t datatype; 47 uint8_t _pad; 48 OIndex n; /* Series number */ 49 OIndex it; /* Element Iterator */ 50 OIndex end; /* Slice end */ 51 } 52 UCellSlice; 53 54 55 typedef struct 56 { 57 //UThread* thread; 58 OValue* rules; 59 UCellSlice input; 60 OString* str; 61 int error; 62 int matchCase; 63 } 64 StringParser; 65 66 67 typedef struct 68 { 69 //UThread* thread; 70 OValue* rules; 71 UCellSlice input; 72 OString* blk; 73 int error; 74 } 75 BlockParser; 31 76 32 77 … … 48 93 49 94 50 static const uint8_t* thruChars( const OValue* bitsetVal, 51 const uint8_t* it, 52 const uint8_t* end ) 95 static int _repeatChar( OString* input, OIndex pos, int limit, int c ) 53 96 { 97 UChar* start; 98 UChar* it; 99 UChar* end; 100 101 it = input->charArray; 102 end = it + input->used; 103 it += pos; 104 if( end > (it + limit) ) 105 end = it + limit; 106 107 start = it; 108 while( it != end ) 109 { 110 if( *it != c ) 111 break; 112 ++it; 113 } 114 return it - start; 115 } 116 117 118 static int _repeatBitset( OString* input, OIndex pos, int limit, 119 const OValue* patc ) 120 { 121 UChar* start; 122 UChar* it; 123 UChar* end; 54 124 int c; 55 OBinary* bin = orSTRING( bitsetVal);125 OBinary* bin = orSTRING(patc); 56 126 const uint8_t* bits = bin->byteArray; 57 127 int maxC = bin->used * 8; 58 128 129 it = input->charArray; 130 end = it + input->used; 131 it += pos; 132 if( end > (it + limit) ) 133 end = it + limit; 134 135 start = it; 59 136 while( it != end ) 60 137 { … … 66 143 ++it; 67 144 } 68 return it ;145 return it - start; 69 146 } 70 147 … … 73 150 Returns zero if matching rule not found. 74 151 */ 75 static const OValue* evalParseStr( const OValue* rit, const OValue* rend,76 OIndex strN, int* spos )152 static const OValue* _parseStr( StringParser* pe, const OValue* rit, 153 const OValue* rend, OIndex* spos ) 77 154 { 78 OString* str; 79 int atom; 80 int pos = *spos; 155 const OBlock* cblk; 156 const OValue* tval; 157 int32_t repMin; 158 int32_t repMax; 159 OString* istr; 160 OIndex pos = *spos; 161 162 istr = pe->str; 81 163 82 164 match: … … 87 169 { 88 170 case OT_WORD: 89 atom = orAtom(rit); 90 91 if( atom == OR_ATOM_OPT ) 92 { 93 const OValue* r2 = rit + 1; 94 95 if( r2 == rend ) 171 switch( orAtom(rit) ) 172 { 173 case OR_ATOM_OPT: 174 ++rit; 175 repMin = 0; 176 repMax = 1; 177 goto repeat; 178 179 case OR_ATOM_ANY: 180 ++rit; 181 repMin = 0; 182 repMax = 0x7fffffff; 183 goto repeat; 184 185 case OR_ATOM_SOME: 186 ++rit; 187 repMin = 1; 188 repMax = 0x7fffffff; 189 goto repeat; 190 191 case OR_ATOM_BREAK: 192 return rit; 193 194 case OR_ATOM_BAR: 195 goto complete; 196 197 case OR_ATOM_TO: 198 case OR_ATOM_THRU: 199 { 200 OAtom ratom = orAtom(rit); 201 202 ++rit; 203 if( rit == rend ) 96 204 return 0; 97 205 98 evalParseStr( r2, r2 + 1, strN, &pos ); 99 if( orErrorThrown ) 100 return 0; 101 rit += 2; 102 } 103 else if( atom == OR_ATOM_ANY ) 104 { 105 //until [not eval-pstr second it] 106 //it: skip it 2 107 108 const OValue* r2 = rit + 1; 109 110 if( r2 == rend ) 111 return 0; 112 113 if( orIs(r2, OT_WORD) ) 206 if( orIs(rit, OT_WORD) ) 114 207 { 115 OBlock* ctxBlk; 116 orWordVal( r2, ctxBlk, r2 ); 117 } 118 119 if( orIs(r2, OT_BITSET) ) 120 { 121 const uint8_t* it; 122 const uint8_t* end; 123 124 str = orStringPtr(strN); 125 it = str->byteArray + pos; 126 end = str->byteArray + str->used; 127 128 end = thruChars( r2, it, end ); 129 if( end != it ) 130 pos = end - str->byteArray; 131 rit += 2; 208 orWordVal( rit, cblk, tval ); 132 209 } 133 210 else 134 211 { 135 orError( "parse any expected bitset" ); 212 tval = rit; 213 } 214 215 switch( orType(tval) ) 216 { 217 case OT_CHAR: 218 { 219 int c = orChar(tval); 220 UChar* cp = istr->charArray + pos; 221 UChar* end = istr->charArray + pe->input.end; 222 while( cp != end ) 223 { 224 if( *cp == c ) 225 break; 226 ++cp; 227 } 228 if( cp == end ) 229 goto failed; 230 pos = cp - istr->charArray; 231 if( ratom == OR_ATOM_THRU ) 232 ++pos; 233 } 234 break; 235 236 case OT_STRING: 237 { 238 OString* pat = orSTRING(tval); 239 pos = orFindString( istr, pos, 240 pat, tval->series.it ); 241 if( pos < 0 ) 242 goto failed; 243 if( ratom == OR_ATOM_THRU ) 244 pos += pat->used - tval->series.it; 245 } 246 break; 247 248 case OT_BITSET: 249 { 250 OBinary* bin = orSTRING(tval); 251 const uint8_t* bits = bin->byteArray; 252 int maxC = bin->used * 8; 253 UChar* cp = istr->charArray + pos; 254 UChar* end = istr->charArray + pe->input.end; 255 int c; 256 while( cp != end ) 257 { 258 c = *cp; 259 if( c < maxC ) 260 { 261 if( orBitIsSet( bits, c ) ) 262 break; 263 } 264 ++cp; 265 } 266 if( cp == end ) 267 goto failed; 268 pos = cp - istr->charArray; 269 if( ratom == OR_ATOM_THRU ) 270 ++pos; 271 } 272 break; 273 274 case OT_BLOCK: 275 // TODO 276 orError( PARSE_EX 277 "to/thru block! not implemented" ); 278 pe->error = 1; 279 return 0; 280 } 281 ++rit; 282 } 283 break; 284 285 case OR_ATOM_SKIP: 286 // TODO - int! skip 287 //if( pos >= istr->used ) 288 // return 0; 289 ++rit; 290 ++pos; 291 break; 292 293 //case OR_ATOM_COPY: 294 295 default: 296 orWordVal( rit, cblk, tval ); 297 298 if( orIs(tval, OT_CHAR) ) 299 goto match_char; 300 else if( orIs(tval, OT_STRING) ) 301 goto match_string; 302 else if( orIs(tval, OT_BLOCK) ) 303 goto match_block; 304 else if( orIs(tval, OT_BITSET) ) 305 goto match_bitset; 306 else 307 { 308 orError( PARSE_EX 309 "parse expected char!/block!/bitset!" ); 310 pe->error = 1; 136 311 return 0; 137 312 } 138 } 139 else if( atom == OR_ATOM_SOME ) 140 { 141 const OValue* found; 142 const OValue* r2end; 143 const OValue* r2 = rit + 1; 144 145 if( r2 == rend ) 146 return 0; 147 148 if( orIs(r2, OT_WORD) ) 149 { 150 OBlock* ctxBlk; 151 orWordVal( r2, ctxBlk, r2 ); 152 } 153 154 if( orIs(r2, OT_BLOCK) ) 155 { 156 OBlock* blk2; 157 158 blk2 = orBLOCK(r2); 159 r2 = blk2->values + r2->series.it; 160 r2end = blk2->values + blk2->used; 161 162 found = evalParseStr( r2, r2end, strN, &pos ); 163 if( found ) 164 { 165 do 166 { 167 found = evalParseStr( r2, r2end, strN, &pos ); 168 if( orErrorThrown ) 169 return 0; 170 } 171 while( found ); 172 rit += 2; 173 } 174 else 175 { 176 goto failed_eval; 177 } 178 } 179 else if( orIs(r2, OT_BITSET) ) 180 { 181 const uint8_t* it; 182 const uint8_t* end; 183 184 str = orStringPtr(strN); 185 it = str->byteArray + pos; 186 end = str->byteArray + str->used; 187 188 end = thruChars( r2, it, end ); 189 if( end == it ) 190 { 191 rit = nextRule( rit, rend ); 192 if( ! rit ) 193 return 0; 194 } 195 else 196 { 197 pos = end - str->byteArray; 198 rit += 2; 199 } 200 } 201 else 202 { 203 orError( "parse some expected block or bitset" ); 204 return 0; 205 } 206 } 207 else if( atom == OR_ATOM_BREAK ) 208 { 209 return rit; 210 } 211 else if( atom == OR_ATOM_BAR ) 212 { 213 goto complete; 214 } 215 else if( atom == OR_ATOM_TO ) 216 { 217 int si; 218 const OValue* r2 = rit + 1; 219 220 if( r2 == rend ) 221 return 0; 222 223 if( ! orIsString( orType(r2) ) ) 224 return 0; 225 226 si = orFindString( orStringPtr(strN), pos, 227 orSTRING(r2), r2->series.it ); 228 if( si > -1 ) 229 { 230 pos = si; 231 rit += 2; 232 } 233 else 234 { 235 goto failed; 236 } 237 } 238 else if( atom == OR_ATOM_SKIP ) 239 { 240 //if( pos >= str->used ) 241 // return 0; 242 ++rit; 243 ++pos; 244 } 245 /* 246 else if( atom == OR_ATOM_THRU ) 247 { 248 } 249 else if( atom == OR_ATOM_SET ) 250 { 251 } 252 else if( atom == OR_ATOM_COPY ) 253 { 254 } 255 */ 256 else 257 { 258 const OValue* wval; 259 const OBlock* cblk; 260 261 orWordVal( rit, cblk, wval ); 262 263 if( orIs(wval, OT_BLOCK) ) 264 { 265 // Same as case OT_BLOCK below. 266 cblk = orBLOCK( wval ); 267 wval = evalParseStr( cblk->values + wval->series.it, 268 cblk->values + cblk->used, 269 strN, &pos ); 270 if( wval ) 271 ++rit; 272 else 273 goto failed_eval; 274 } 275 else if( orIs(wval, OT_BITSET) ) 276 { 277 OString* bin = orSTRING( wval ); 278 int c; 279 280 str = orStringPtr(strN); 281 c = str->charArray[ pos ]; 282 if( orBitIsSet( bin->byteArray, c ) ) 283 { 284 ++rit; 285 ++pos; 286 } 287 else 288 goto failed; 289 } 290 else 291 { 292 orError( "parse expected block or bitset" ); 293 return 0; 294 } 313 break; 295 314 } 296 315 break; … … 298 317 case OT_SETWORD: 299 318 { 300 OValue* wval; 301 OBlock* ctxBlk; 302 303 orWordVal( rit, ctxBlk, wval ); 304 305 orSetTF( wval, OT_STRING ); 306 orSetSeries( wval, strN, pos ); 307 319 OValue* cell; 320 orWordVal( rit, cblk, cell ); 308 321 ++rit; 322 323 orSetTF( cell, OT_STRING ); 324 orSetSeries( cell, pe->input.n, pos ); 309 325 } 310 326 break; 311 327 #if 0 328 case OT_GETWORD: 329 { 330 OValue* cell; 331 orWordVal( rit, cblk, cell ); 332 ++rit; 333 334 if( orIs(cell, OT_SLICE) && (cell->series.n == pe->input.n) ) 335 cell->slice.end = pos; 336 } 337 break; 338 #endif 312 339 case OT_INTEGER: 313 {314 const OValue* r2 = rit + 1; 315 316 if( r 2== rend )340 repMin = orInt(rit); 341 342 ++rit; 343 if( rit == rend ) 317 344 return 0; 318 345 319 if( orIs(r2, OT_INTEGER) ) 320 { 321 int count = 0; 322 int maxCount = orInt(r2); 323 324 ++r2; 325 if( r2 == rend ) 326 return 0; 327 328 while( 1 ) 329 { 330 if( ! evalParseStr( r2, r2 + 1, strN, &pos ) ) 331 break; 332 ++count; 333 } 334 335 if( orErrorThrown ) 336 return 0; 337 if( (count >= orInt(rit)) && (count <= maxCount) ) 338 rit += 3; 339 else 340 goto failed; 341 } 342 } 343 break; 346 if( orIs(rit, OT_INTEGER) ) 347 { 348 repMax = orInt(rit); 349 ++rit; 350 } 351 else 352 { 353 repMax = repMin; 354 } 355 goto repeat; 344 356 345 357 case OT_CHAR: 346 str = orStringPtr(strN); 347 if( str->charArray[ pos ] == orInt(rit) ) 358 tval = rit; 359 match_char: 360 if( istr->charArray[ pos ] == orChar(tval) ) 348 361 { 349 362 ++rit; … … 355 368 356 369 case OT_BLOCK: 370 tval = rit; 371 match_block: 372 cblk = orBLOCK( tval ); 373 tval = _parseStr( pe, cblk->values + tval->series.it, 374 cblk->values + cblk->used, &pos ); 375 istr = pe->str; 376 if( ! tval ) 377 goto failed_eval; 378 ++rit; 379 break; 380 381 case OT_PAREN: 382 orEvalBlock( orBlockPtr(rit->series.n), rit->series.it ); 383 if( orErrorThrown ) 384 { 385 pe->error = 1; 386 return 0; 387 } 388 389 /* Re-aquire pointer & check if input modified. */ 390 istr = pe->str = orStringPtr( pe->input.n ); 391 if( istr->used < pe->input.end ) 392 pe->input.end = istr->used; 393 394 ++rit; 395 break; 396 397 case OT_STRING: 398 tval = rit; 399 match_string: 357 400 { 358 const OValue* found; 359 const OBlock* cblk; 360 361 cblk = orBLOCK( rit ); 362 found = evalParseStr( cblk->values + rit->series.it, 363 cblk->values + cblk->used, 364 strN, &pos ); 365 if( found ) 366 ++rit; 367 else 368 goto failed_eval; 369 } 370 break; 371 372 case OT_PAREN: 373 { 374 orEvalBlock( orBLOCK(rit), rit->series.it ); 375 if( orErrorThrown ) 376 return 0; 377 ++rit; 378 } 379 break; 380 381 case OT_STRING: 382 { 383 OString* mstr = orSTRING(rit); 384 385 str = orStringPtr(strN); 401 OString* pat = orSTRING( tval ); 402 386 403
