Changeset 159
- Timestamp:
- 05/26/06 16:45:58 (3 years ago)
- Location:
- trunk/thune
- Files:
-
- 2 added
- 3 modified
-
encoding.c (modified) (3 diffs)
-
print.c (modified) (7 diffs)
-
print_string.c (added)
-
series.c (modified) (5 diffs)
-
tests/working/encode.t (added)
Legend:
- Unmodified
- Added
- Removed
-
trunk/thune/encoding.c
r158 r159 54 54 Returns number of characters copied. 55 55 */ 56 static int copyAsciiToUtf16( uint16_t* dest, uint8_t* src, int len )56 int copyUtf16ToAscii( char* dest, const uint16_t* src, int len ) 57 57 { 58 uint8_t* end = src + len; 58 const uint16_t* end; 59 uint16_t c; 60 61 end = src + len; 62 63 while( src != end ) 64 { 65 c = *src++; 66 if( c > 127 ) 67 c = 0; 68 *dest++ = c; 69 } 70 return len; 71 } 72 73 74 /* 75 Returns number of characters copied. 76 */ 77 int copyAsciiToUtf16( uint16_t* dest, const char* src, int len ) 78 { 79 const char* end = src + len; 59 80 while( src != end ) 60 81 *dest++ = *src++; … … 68 89 UIndex strN; 69 90 UCell* res; 70 UChar* cpA;71 UChar* cpB;91 char* cpA; 92 char* cpB; 72 93 UBinary* bin; 73 int len; 94 int count; 95 int enc; 74 96 75 97 UR_S_DROP; … … 78 100 if( ur_isAWord(tos) && ur_stringSlice(res, &cpA, &cpB) ) 79 101 { 80 len= cpB - cpA;102 count = cpB - cpA; 81 103 82 104 switch( ur_atom(tos) ) 83 105 { 84 106 case UR_ATOM_ASCII: 85 /* 86 ur_setType(res, UT_STRING); 87 ur_setEncoding( res, UR_ENC_ASCII ); 88 ur_setSeries(res, strN, 0); 89 */ 107 switch( ur_encoding(res) ) 108 { 109 case UR_ENC_UTF16: 110 count >>= 1; 111 strN = ur_makeBinary( count ); 112 bin = ur_binPtr( strN ); 113 bin->used = copyUtf16ToAscii( bin->ptr.c, 114 (uint16_t*) cpA, count ); 115 enc = UR_ENC_ASCII; 116 goto set_result; 117 } 90 118 break; 91 119 92 120 case UR_ATOM_UTF8: 93 121 /* 94 ur_setType(res, UT_STRING); 95 ur_setEncoding( res, UR_ENC_UTF8 ); 96 ur_setSeries(res, strN, 0); 122 enc = UR_ENC_UTF8; 123 switch( ur_encoding(res) ) 124 { 125 case UR_ENC_ASCII: 126 goto set_result; 127 128 case UR_ENC_UTF16: 129 goto set_result; 130 } 97 131 */ 98 132 break; 99 133 100 134 case UR_ATOM_UTF16: 101 strN = ur_makeBinary( len * 2 );102 bin = ur_binPtr( strN );103 104 135 switch( ur_encoding(res) ) 105 136 { 106 137 case UR_ENC_ASCII: 138 strN = ur_makeBinary( count * 2 ); 139 bin = ur_binPtr( strN ); 107 140 bin->used = copyAsciiToUtf16( bin->ptr.u16, 108 (uint8_t*) cpA, len ); 109 break; 141 cpA, count ); 142 enc = UR_ENC_UTF16; 143 goto set_result; 110 144 } 111 112 ur_setType(res, UT_STRING);113 ur_setEncoding( res, UR_ENC_UTF16 );114 ur_setSeries(res, strN, 0);115 145 break; 116 146 } 117 147 } 148 return; 149 150 set_result: 151 152 ur_setType(res, UT_STRING); 153 ur_setEncoding(res, enc); 154 ur_setSeries(res, strN, 0); 118 155 } 119 156 -
trunk/thune/print.c
r157 r159 29 29 30 30 31 extern int copyUtf16ToAscii( char* dest, const uint16_t* src, int len ); 32 33 31 34 static char charStr[] = "'?'"; 32 35 … … 38 41 memCpy( str->ptr.c + str->used, cp, len ); 39 42 str->used += len; 43 } 44 45 46 void ur_strCatUtf16( UString* str, const uint16_t* cp, int len ) 47 { 48 if( (str->used + len) > str->avail ) 49 EXPAND( str, len ); 50 str->used += copyUtf16ToAscii( str->ptr.c + str->used, cp, len ); 40 51 } 41 52 … … 87 98 88 99 89 /*90 Returns number of characters copied.91 */92 static int copyUtf16ToAscii( uint8_t* dest, uint16_t* src, int len )93 {94 uint16_t* end;95 uint16_t c;96 97 end = src + len;98 99 while( src != end )100 {101 c = *src++;102 if( c > 127 )103 c = 0;104 *dest++ = c;105 }106 return len;107 }108 109 110 100 #ifdef OR_CONFIG_HEX_TOKEN 111 101 static void appendHex( UString* out, int32_t n ) … … 172 162 173 163 174 static void _appendString( UString* out, const UChar* it, const UChar* end ) 175 { 176 int used = end - it; 177 if( used > 0 ) 178 { 179 int newlines = 0; 180 int quote = '{'; 181 182 if( used < 51 ) 183 { 184 // If the string is short and has less than 3 newlines then 185 // use normal quotes. 186 187 const UChar* ci = it; 188 while( ci != end ) 189 { 190 if( *ci == '"' ) 191 break; 192 if( *ci == '\n' ) 193 { 194 if( newlines == 3 ) 195 break; 196 ++newlines; 197 } 198 ++ci; 199 } 200 if( ci == end ) 201 quote = '"'; 202 } 203 204 append1( quote, out ); 205 { 206 const UChar* put; 207 const UChar* cp = it; 208 209 #define PUT if(cp != put) {append(out,put,cp - put);} put = cp + 1; 210 211 put = cp; 212 while( cp != end ) 213 { 214 if( *cp == '^' ) 215 { 216 PUT 217 append( out, "^^", 2 ); 218 } 219 else if( *cp == '}' ) 220 { 221 PUT 222 if( quote == '"' ) 223 append1( '}', out ); 224 else 225 append( out, "^}", 2 ); 226 } 227 else if( *cp == '\n' ) 228 { 229 PUT 230 if( quote == '"' ) 231 append( out, "^/", 2 ); 232 else 233 append1( '\n', out ); 234 } 235 else if( *cp == '\t' ) 236 { 237 PUT 238 append( out, "^-", 2 ); 239 } 240 else if( *cp == '\0' ) 241 { 242 PUT 243 append( out, "^@", 2 ); 244 } 245 ++cp; 246 } 247 if( cp != put ) 248 append( out, put, cp - put ); 249 } 250 251 if( quote == '{' ) 252 quote = '}'; 253 append1( quote, out ); 254 } 255 else 256 { 257 append( out, "\"\"", 2 ); 258 } 259 } 164 #define APP_STR_FUNC _appendStringAscii 165 #define APP_STR_T char 166 #define APP_STR_COPY ur_strCat 167 #include "print_string.c" 168 169 #define APP_STR_FUNC _appendStringUtf16 170 #define APP_STR_T uint16_t 171 #define APP_STR_COPY ur_strCatUtf16 172 #include "print_string.c" 260 173 261 174 … … 711 624 case UR_ENC_UTF16: 712 625 EXPAND( out, used ); 713 out->used += copyUtf16ToAscii( (uint8_t*)out->ptr.c,626 out->used += copyUtf16ToAscii( out->ptr.c, 714 627 str->ptr.u16, used ); 715 628 break; … … 892 805 { 893 806 UString* str = ur_bin( val ); 894 _appendString( out, str->ptr.c + val->series.it, 895 str->ptr.c + str->used ); 807 if( ur_encoding(val) == UR_ENC_UTF16 ) 808 { 809 _appendStringUtf16( out, str->ptr.u16 + val->series.it, 810 str->ptr.u16 + str->used ); 811 } 812 else 813 { 814 _appendStringAscii( out, str->ptr.c + val->series.it, 815 str->ptr.c + str->used ); 816 } 896 817 } 897 818 break; … … 1138 1059 { 1139 1060 UString* str = ur_bin(val); 1140 _appendString( out, str->ptr.c + val->slice.it, 1141 str->ptr.c + ur_sliceEnd(val, str) ); 1061 if( ur_encoding(val) == UR_ENC_UTF16 ) 1062 { 1063 _appendStringUtf16( out, str->ptr.u16 + val->slice.it, 1064 str->ptr.u16 + ur_sliceEnd(val,str) ); 1065 } 1066 else 1067 { 1068 _appendStringAscii( out, str->ptr.c + val->slice.it, 1069 str->ptr.c + ur_sliceEnd(val,str) ); 1070 } 1142 1071 } 1143 1072 break; -
trunk/thune/series.c
r152 r159 37 37 38 38 39 /* 40 Returns: 41 Pointer to UString if cell is a string! or string slice!. 42 */ 39 43 UString* ur_stringSlice( const UCell* cell, UChar** cpA, UChar** cpB ) 40 44 { 41 45 UString* str; 46 int end; 42 47 43 48 if( ur_is(cell, UT_STRING) ) 44 49 { 45 50 str = ur_bin(cell); 46 if( str->used ) 51 if( ! str->used ) 52 goto empty; 53 end = str->used; 54 55 set_pointers: 56 57 if( ur_encoding(cell) == UR_ENC_UTF16 ) 58 { 59 *cpA = (char*) (str->ptr.u16 + cell->series.it); 60 *cpB = (char*) (str->ptr.u16 + end); 61 } 62 else 47 63 { 48 64 *cpA = str->ptr.c + cell->series.it; 49 *cpB = str->ptr.c + str->used; 50 return str; 51 } 52 else 53 { 65 *cpB = str->ptr.c + end; 66 } 67 return str; 68 } 69 else if( ur_is(cell, UT_SLICE) && (ur_sliceDT(cell) == UT_STRING) ) 70 { 71 str = ur_bin(cell); 72 if( ! str->used ) 54 73 goto empty; 55 } 56 } 57 else if( ur_is(cell, UT_SLICE) && (ur_sliceDT(cell) == UT_STRING) ) 58 { 59 str = ur_bin(cell); 60 if( str->used ) 61 { 62 *cpA = str->ptr.c + cell->slice.it; 63 *cpB = str->ptr.c + ur_sliceEnd(cell, str); 64 return str; 65 } 66 else 67 { 68 goto empty; 69 } 74 end = ur_sliceEnd(cell, str); 75 goto set_pointers; 70 76 } 71 77 return 0; … … 929 935 { 930 936 case UT_BINARY: 931 case UT_STRING:932 937 { 933 938 UString* arr = ur_bin(tos); 934 939 if( tos->series.it < arr->used ) 935 940 ur_arrayErase( arr, sizeof(char), tos->series.it, len ); 941 } 942 break; 943 944 case UT_STRING: 945 { 946 UString* arr = ur_bin(tos); 947 if( tos->series.it < arr->used ) 948 { 949 if( ur_encoding(tos) == UR_ENC_UTF16 ) 950 ur_arrayErase( arr, sizeof(uint16_t), tos->series.it, len ); 951 else 952 ur_arrayErase( arr, sizeof(char), tos->series.it, len ); 953 } 936 954 } 937 955 break; … … 1047 1065 if( (n > -1) && (n < str->used) ) 1048 1066 { 1067 if( ur_encoding(ser) == UR_ENC_UTF16 ) 1068 ur_int(result) = str->ptr.u16[ n ]; 1069 else 1070 ur_int(result) = str->ptr.c[ n ]; 1071 1072 // Must set type after we check encoding. 1049 1073 ur_setType( result, UT_CHAR ); 1050 ur_int(result) = str->ptr.c[ n ];1051 1074 return 1; 1052 1075 } … … 1540 1563 { 1541 1564 case UT_BINARY: 1565 if( ur_is(val, UT_CHAR) || ur_is(val, UT_INT) ) 1566 { 1567 UString* str = ur_bin(ser); 1568 if( n < str->used ) 1569 { 1570 str->ptr.c[ n ] = ur_int(val); 1571 UR_S_DROP; 1572 UR_S_NIP; 1573 return; 1574 } 1575 } 1576 goto bad_index; 1577 1542 1578 case UT_STRING: 1543 1579 if( ur_is(val, UT_CHAR) || ur_is(val, UT_INT) ) … … 1546 1582 if( n < str->used ) 1547 1583 { 1548 str->ptr.c[ n ] = ur_int(val); 1584 if( ur_encoding(ser) == UR_ENC_UTF16 ) 1585 str->ptr.u16[ n ] = ur_int(val); 1586 else 1587 str->ptr.c[ n ] = ur_int(val); 1549 1588 UR_S_DROP; 1550 1589 UR_S_NIP;
