diff options
author | dan miller | 2007-10-20 05:34:26 +0000 |
---|---|---|
committer | dan miller | 2007-10-20 05:34:26 +0000 |
commit | 354ea97baf765759911f0c56d3ed511350ebe348 (patch) | |
tree | 1adf96a98045d24b8741ba02bf21d195e70993ca /libraries/sqlite/win32/utf.c | |
parent | sqlite source (unix build) added to libraries (diff) | |
download | opensim-SC_OLD-354ea97baf765759911f0c56d3ed511350ebe348.zip opensim-SC_OLD-354ea97baf765759911f0c56d3ed511350ebe348.tar.gz opensim-SC_OLD-354ea97baf765759911f0c56d3ed511350ebe348.tar.bz2 opensim-SC_OLD-354ea97baf765759911f0c56d3ed511350ebe348.tar.xz |
sqlite 3.5.1 windows source
Diffstat (limited to '')
-rwxr-xr-x | libraries/sqlite/win32/utf.c | 545 |
1 files changed, 545 insertions, 0 deletions
diff --git a/libraries/sqlite/win32/utf.c b/libraries/sqlite/win32/utf.c new file mode 100755 index 0000000..b6b3133 --- /dev/null +++ b/libraries/sqlite/win32/utf.c | |||
@@ -0,0 +1,545 @@ | |||
1 | /* | ||
2 | ** 2004 April 13 | ||
3 | ** | ||
4 | ** The author disclaims copyright to this source code. In place of | ||
5 | ** a legal notice, here is a blessing: | ||
6 | ** | ||
7 | ** May you do good and not evil. | ||
8 | ** May you find forgiveness for yourself and forgive others. | ||
9 | ** May you share freely, never taking more than you give. | ||
10 | ** | ||
11 | ************************************************************************* | ||
12 | ** This file contains routines used to translate between UTF-8, | ||
13 | ** UTF-16, UTF-16BE, and UTF-16LE. | ||
14 | ** | ||
15 | ** $Id: utf.c,v 1.59 2007/10/03 08:46:45 danielk1977 Exp $ | ||
16 | ** | ||
17 | ** Notes on UTF-8: | ||
18 | ** | ||
19 | ** Byte-0 Byte-1 Byte-2 Byte-3 Value | ||
20 | ** 0xxxxxxx 00000000 00000000 0xxxxxxx | ||
21 | ** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx | ||
22 | ** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx | ||
23 | ** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx | ||
24 | ** | ||
25 | ** | ||
26 | ** Notes on UTF-16: (with wwww+1==uuuuu) | ||
27 | ** | ||
28 | ** Word-0 Word-1 Value | ||
29 | ** 110110ww wwzzzzyy 110111yy yyxxxxxx 000uuuuu zzzzyyyy yyxxxxxx | ||
30 | ** zzzzyyyy yyxxxxxx 00000000 zzzzyyyy yyxxxxxx | ||
31 | ** | ||
32 | ** | ||
33 | ** BOM or Byte Order Mark: | ||
34 | ** 0xff 0xfe little-endian utf-16 follows | ||
35 | ** 0xfe 0xff big-endian utf-16 follows | ||
36 | ** | ||
37 | */ | ||
38 | #include "sqliteInt.h" | ||
39 | #include <assert.h> | ||
40 | #include "vdbeInt.h" | ||
41 | |||
42 | /* | ||
43 | ** The following constant value is used by the SQLITE_BIGENDIAN and | ||
44 | ** SQLITE_LITTLEENDIAN macros. | ||
45 | */ | ||
46 | const int sqlite3one = 1; | ||
47 | |||
48 | /* | ||
49 | ** This lookup table is used to help decode the first byte of | ||
50 | ** a multi-byte UTF8 character. | ||
51 | */ | ||
52 | static const unsigned char sqlite3UtfTrans1[] = { | ||
53 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||
54 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | ||
55 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | ||
56 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | ||
57 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||
58 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | ||
59 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||
60 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, | ||
61 | }; | ||
62 | |||
63 | |||
64 | #define WRITE_UTF8(zOut, c) { \ | ||
65 | if( c<0x00080 ){ \ | ||
66 | *zOut++ = (c&0xFF); \ | ||
67 | } \ | ||
68 | else if( c<0x00800 ){ \ | ||
69 | *zOut++ = 0xC0 + ((c>>6)&0x1F); \ | ||
70 | *zOut++ = 0x80 + (c & 0x3F); \ | ||
71 | } \ | ||
72 | else if( c<0x10000 ){ \ | ||
73 | *zOut++ = 0xE0 + ((c>>12)&0x0F); \ | ||
74 | *zOut++ = 0x80 + ((c>>6) & 0x3F); \ | ||
75 | *zOut++ = 0x80 + (c & 0x3F); \ | ||
76 | }else{ \ | ||
77 | *zOut++ = 0xF0 + ((c>>18) & 0x07); \ | ||
78 | *zOut++ = 0x80 + ((c>>12) & 0x3F); \ | ||
79 | *zOut++ = 0x80 + ((c>>6) & 0x3F); \ | ||
80 | *zOut++ = 0x80 + (c & 0x3F); \ | ||
81 | } \ | ||
82 | } | ||
83 | |||
84 | #define WRITE_UTF16LE(zOut, c) { \ | ||
85 | if( c<=0xFFFF ){ \ | ||
86 | *zOut++ = (c&0x00FF); \ | ||
87 | *zOut++ = ((c>>8)&0x00FF); \ | ||
88 | }else{ \ | ||
89 | *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \ | ||
90 | *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03)); \ | ||
91 | *zOut++ = (c&0x00FF); \ | ||
92 | *zOut++ = (0x00DC + ((c>>8)&0x03)); \ | ||
93 | } \ | ||
94 | } | ||
95 | |||
96 | #define WRITE_UTF16BE(zOut, c) { \ | ||
97 | if( c<=0xFFFF ){ \ | ||
98 | *zOut++ = ((c>>8)&0x00FF); \ | ||
99 | *zOut++ = (c&0x00FF); \ | ||
100 | }else{ \ | ||
101 | *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03)); \ | ||
102 | *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \ | ||
103 | *zOut++ = (0x00DC + ((c>>8)&0x03)); \ | ||
104 | *zOut++ = (c&0x00FF); \ | ||
105 | } \ | ||
106 | } | ||
107 | |||
108 | #define READ_UTF16LE(zIn, c){ \ | ||
109 | c = (*zIn++); \ | ||
110 | c += ((*zIn++)<<8); \ | ||
111 | if( c>=0xD800 && c<0xE000 ){ \ | ||
112 | int c2 = (*zIn++); \ | ||
113 | c2 += ((*zIn++)<<8); \ | ||
114 | c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ | ||
115 | if( (c & 0xFFFF0000)==0 ) c = 0xFFFD; \ | ||
116 | } \ | ||
117 | } | ||
118 | |||
119 | #define READ_UTF16BE(zIn, c){ \ | ||
120 | c = ((*zIn++)<<8); \ | ||
121 | c += (*zIn++); \ | ||
122 | if( c>=0xD800 && c<0xE000 ){ \ | ||
123 | int c2 = ((*zIn++)<<8); \ | ||
124 | c2 += (*zIn++); \ | ||
125 | c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ | ||
126 | if( (c & 0xFFFF0000)==0 ) c = 0xFFFD; \ | ||
127 | } \ | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | ** Translate a single UTF-8 character. Return the unicode value. | ||
132 | ** | ||
133 | ** During translation, assume that the byte that zTerm points | ||
134 | ** is a 0x00. | ||
135 | ** | ||
136 | ** Write a pointer to the next unread byte back into *pzNext. | ||
137 | ** | ||
138 | ** Notes On Invalid UTF-8: | ||
139 | ** | ||
140 | ** * This routine never allows a 7-bit character (0x00 through 0x7f) to | ||
141 | ** be encoded as a multi-byte character. Any multi-byte character that | ||
142 | ** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd. | ||
143 | ** | ||
144 | ** * This routine never allows a UTF16 surrogate value to be encoded. | ||
145 | ** If a multi-byte character attempts to encode a value between | ||
146 | ** 0xd800 and 0xe000 then it is rendered as 0xfffd. | ||
147 | ** | ||
148 | ** * Bytes in the range of 0x80 through 0xbf which occur as the first | ||
149 | ** byte of a character are interpreted as single-byte characters | ||
150 | ** and rendered as themselves even though they are technically | ||
151 | ** invalid characters. | ||
152 | ** | ||
153 | ** * This routine accepts an infinite number of different UTF8 encodings | ||
154 | ** for unicode values 0x80 and greater. It do not change over-length | ||
155 | ** encodings to 0xfffd as some systems recommend. | ||
156 | */ | ||
157 | int sqlite3Utf8Read( | ||
158 | const unsigned char *z, /* First byte of UTF-8 character */ | ||
159 | const unsigned char *zTerm, /* Pretend this byte is 0x00 */ | ||
160 | const unsigned char **pzNext /* Write first byte past UTF-8 char here */ | ||
161 | ){ | ||
162 | int c = *(z++); | ||
163 | if( c>=0xc0 ){ | ||
164 | c = sqlite3UtfTrans1[c-0xc0]; | ||
165 | while( z!=zTerm && (*z & 0xc0)==0x80 ){ | ||
166 | c = (c<<6) + (0x3f & *(z++)); | ||
167 | } | ||
168 | if( c<0x80 | ||
169 | || (c&0xFFFFF800)==0xD800 | ||
170 | || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } | ||
171 | } | ||
172 | *pzNext = z; | ||
173 | return c; | ||
174 | } | ||
175 | |||
176 | |||
177 | |||
178 | /* | ||
179 | ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is | ||
180 | ** printed on stderr on the way into and out of sqlite3VdbeMemTranslate(). | ||
181 | */ | ||
182 | /* #define TRANSLATE_TRACE 1 */ | ||
183 | |||
184 | #ifndef SQLITE_OMIT_UTF16 | ||
185 | /* | ||
186 | ** This routine transforms the internal text encoding used by pMem to | ||
187 | ** desiredEnc. It is an error if the string is already of the desired | ||
188 | ** encoding, or if *pMem does not contain a string value. | ||
189 | */ | ||
190 | int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){ | ||
191 | unsigned char zShort[NBFS]; /* Temporary short output buffer */ | ||
192 | int len; /* Maximum length of output string in bytes */ | ||
193 | unsigned char *zOut; /* Output buffer */ | ||
194 | unsigned char *zIn; /* Input iterator */ | ||
195 | unsigned char *zTerm; /* End of input */ | ||
196 | unsigned char *z; /* Output iterator */ | ||
197 | unsigned int c; | ||
198 | |||
199 | assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); | ||
200 | assert( pMem->flags&MEM_Str ); | ||
201 | assert( pMem->enc!=desiredEnc ); | ||
202 | assert( pMem->enc!=0 ); | ||
203 | assert( pMem->n>=0 ); | ||
204 | |||
205 | #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG) | ||
206 | { | ||
207 | char zBuf[100]; | ||
208 | sqlite3VdbeMemPrettyPrint(pMem, zBuf); | ||
209 | fprintf(stderr, "INPUT: %s\n", zBuf); | ||
210 | } | ||
211 | #endif | ||
212 | |||
213 | /* If the translation is between UTF-16 little and big endian, then | ||
214 | ** all that is required is to swap the byte order. This case is handled | ||
215 | ** differently from the others. | ||
216 | */ | ||
217 | if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){ | ||
218 | u8 temp; | ||
219 | int rc; | ||
220 | rc = sqlite3VdbeMemMakeWriteable(pMem); | ||
221 | if( rc!=SQLITE_OK ){ | ||
222 | assert( rc==SQLITE_NOMEM ); | ||
223 | return SQLITE_NOMEM; | ||
224 | } | ||
225 | zIn = (u8*)pMem->z; | ||
226 | zTerm = &zIn[pMem->n]; | ||
227 | while( zIn<zTerm ){ | ||
228 | temp = *zIn; | ||
229 | *zIn = *(zIn+1); | ||
230 | zIn++; | ||
231 | *zIn++ = temp; | ||
232 | } | ||
233 | pMem->enc = desiredEnc; | ||
234 | goto translate_out; | ||
235 | } | ||
236 | |||
237 | /* Set len to the maximum number of bytes required in the output buffer. */ | ||
238 | if( desiredEnc==SQLITE_UTF8 ){ | ||
239 | /* When converting from UTF-16, the maximum growth results from | ||
240 | ** translating a 2-byte character to a 4-byte UTF-8 character. | ||
241 | ** A single byte is required for the output string | ||
242 | ** nul-terminator. | ||
243 | */ | ||
244 | len = pMem->n * 2 + 1; | ||
245 | }else{ | ||
246 | /* When converting from UTF-8 to UTF-16 the maximum growth is caused | ||
247 | ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16 | ||
248 | ** character. Two bytes are required in the output buffer for the | ||
249 | ** nul-terminator. | ||
250 | */ | ||
251 | len = pMem->n * 2 + 2; | ||
252 | } | ||
253 | |||
254 | /* Set zIn to point at the start of the input buffer and zTerm to point 1 | ||
255 | ** byte past the end. | ||
256 | ** | ||
257 | ** Variable zOut is set to point at the output buffer. This may be space | ||
258 | ** obtained from sqlite3_malloc(), or Mem.zShort, if it large enough and | ||
259 | ** not in use, or the zShort array on the stack (see above). | ||
260 | */ | ||
261 | zIn = (u8*)pMem->z; | ||
262 | zTerm = &zIn[pMem->n]; | ||
263 | if( len>NBFS ){ | ||
264 | zOut = sqlite3DbMallocRaw(pMem->db, len); | ||
265 | if( !zOut ){ | ||
266 | return SQLITE_NOMEM; | ||
267 | } | ||
268 | }else{ | ||
269 | zOut = zShort; | ||
270 | } | ||
271 | z = zOut; | ||
272 | |||
273 | if( pMem->enc==SQLITE_UTF8 ){ | ||
274 | if( desiredEnc==SQLITE_UTF16LE ){ | ||
275 | /* UTF-8 -> UTF-16 Little-endian */ | ||
276 | while( zIn<zTerm ){ | ||
277 | c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); | ||
278 | WRITE_UTF16LE(z, c); | ||
279 | } | ||
280 | }else{ | ||
281 | assert( desiredEnc==SQLITE_UTF16BE ); | ||
282 | /* UTF-8 -> UTF-16 Big-endian */ | ||
283 | while( zIn<zTerm ){ | ||
284 | c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); | ||
285 | WRITE_UTF16BE(z, c); | ||
286 | } | ||
287 | } | ||
288 | pMem->n = z - zOut; | ||
289 | *z++ = 0; | ||
290 | }else{ | ||
291 | assert( desiredEnc==SQLITE_UTF8 ); | ||
292 | if( pMem->enc==SQLITE_UTF16LE ){ | ||
293 | /* UTF-16 Little-endian -> UTF-8 */ | ||
294 | while( zIn<zTerm ){ | ||
295 | READ_UTF16LE(zIn, c); | ||
296 | WRITE_UTF8(z, c); | ||
297 | } | ||
298 | }else{ | ||
299 | /* UTF-16 Little-endian -> UTF-8 */ | ||
300 | while( zIn<zTerm ){ | ||
301 | READ_UTF16BE(zIn, c); | ||
302 | WRITE_UTF8(z, c); | ||
303 | } | ||
304 | } | ||
305 | pMem->n = z - zOut; | ||
306 | } | ||
307 | *z = 0; | ||
308 | assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); | ||
309 | |||
310 | sqlite3VdbeMemRelease(pMem); | ||
311 | pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short); | ||
312 | pMem->enc = desiredEnc; | ||
313 | if( zOut==zShort ){ | ||
314 | memcpy(pMem->zShort, zOut, len); | ||
315 | zOut = (u8*)pMem->zShort; | ||
316 | pMem->flags |= (MEM_Term|MEM_Short); | ||
317 | }else{ | ||
318 | pMem->flags |= (MEM_Term|MEM_Dyn); | ||
319 | } | ||
320 | pMem->z = (char*)zOut; | ||
321 | |||
322 | translate_out: | ||
323 | #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG) | ||
324 | { | ||
325 | char zBuf[100]; | ||
326 | sqlite3VdbeMemPrettyPrint(pMem, zBuf); | ||
327 | fprintf(stderr, "OUTPUT: %s\n", zBuf); | ||
328 | } | ||
329 | #endif | ||
330 | return SQLITE_OK; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | ** This routine checks for a byte-order mark at the beginning of the | ||
335 | ** UTF-16 string stored in *pMem. If one is present, it is removed and | ||
336 | ** the encoding of the Mem adjusted. This routine does not do any | ||
337 | ** byte-swapping, it just sets Mem.enc appropriately. | ||
338 | ** | ||
339 | ** The allocation (static, dynamic etc.) and encoding of the Mem may be | ||
340 | ** changed by this function. | ||
341 | */ | ||
342 | int sqlite3VdbeMemHandleBom(Mem *pMem){ | ||
343 | int rc = SQLITE_OK; | ||
344 | u8 bom = 0; | ||
345 | |||
346 | if( pMem->n<0 || pMem->n>1 ){ | ||
347 | u8 b1 = *(u8 *)pMem->z; | ||
348 | u8 b2 = *(((u8 *)pMem->z) + 1); | ||
349 | if( b1==0xFE && b2==0xFF ){ | ||
350 | bom = SQLITE_UTF16BE; | ||
351 | } | ||
352 | if( b1==0xFF && b2==0xFE ){ | ||
353 | bom = SQLITE_UTF16LE; | ||
354 | } | ||
355 | } | ||
356 | |||
357 | if( bom ){ | ||
358 | /* This function is called as soon as a string is stored in a Mem*, | ||
359 | ** from within sqlite3VdbeMemSetStr(). At that point it is not possible | ||
360 | ** for the string to be stored in Mem.zShort, or for it to be stored | ||
361 | ** in dynamic memory with no destructor. | ||
362 | */ | ||
363 | assert( !(pMem->flags&MEM_Short) ); | ||
364 | assert( !(pMem->flags&MEM_Dyn) || pMem->xDel ); | ||
365 | if( pMem->flags & MEM_Dyn ){ | ||
366 | void (*xDel)(void*) = pMem->xDel; | ||
367 | char *z = pMem->z; | ||
368 | pMem->z = 0; | ||
369 | pMem->xDel = 0; | ||
370 | rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, | ||
371 | SQLITE_TRANSIENT); | ||
372 | xDel(z); | ||
373 | }else{ | ||
374 | rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, | ||
375 | SQLITE_TRANSIENT); | ||
376 | } | ||
377 | } | ||
378 | return rc; | ||
379 | } | ||
380 | #endif /* SQLITE_OMIT_UTF16 */ | ||
381 | |||
382 | /* | ||
383 | ** pZ is a UTF-8 encoded unicode string. If nByte is less than zero, | ||
384 | ** return the number of unicode characters in pZ up to (but not including) | ||
385 | ** the first 0x00 byte. If nByte is not less than zero, return the | ||
386 | ** number of unicode characters in the first nByte of pZ (or up to | ||
387 | ** the first 0x00, whichever comes first). | ||
388 | */ | ||
389 | int sqlite3Utf8CharLen(const char *zIn, int nByte){ | ||
390 | int r = 0; | ||
391 | const u8 *z = (const u8*)zIn; | ||
392 | const u8 *zTerm; | ||
393 | if( nByte>=0 ){ | ||
394 | zTerm = &z[nByte]; | ||
395 | }else{ | ||
396 | zTerm = (const u8*)(-1); | ||
397 | } | ||
398 | assert( z<=zTerm ); | ||
399 | while( *z!=0 && z<zTerm ){ | ||
400 | SQLITE_SKIP_UTF8(z); | ||
401 | r++; | ||
402 | } | ||
403 | return r; | ||
404 | } | ||
405 | |||
406 | /* This test function is not currently used by the automated test-suite. | ||
407 | ** Hence it is only available in debug builds. | ||
408 | */ | ||
409 | #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) | ||
410 | /* | ||
411 | ** Translate UTF-8 to UTF-8. | ||
412 | ** | ||
413 | ** This has the effect of making sure that the string is well-formed | ||
414 | ** UTF-8. Miscoded characters are removed. | ||
415 | ** | ||
416 | ** The translation is done in-place (since it is impossible for the | ||
417 | ** correct UTF-8 encoding to be longer than a malformed encoding). | ||
418 | */ | ||
419 | int sqlite3Utf8To8(unsigned char *zIn){ | ||
420 | unsigned char *zOut = zIn; | ||
421 | unsigned char *zStart = zIn; | ||
422 | unsigned char *zTerm; | ||
423 | u32 c; | ||
424 | |||
425 | while( zIn[0] ){ | ||
426 | c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); | ||
427 | if( c!=0xfffd ){ | ||
428 | WRITE_UTF8(zOut, c); | ||
429 | } | ||
430 | } | ||
431 | *zOut = 0; | ||
432 | return zOut - zStart; | ||
433 | } | ||
434 | #endif | ||
435 | |||
436 | #ifndef SQLITE_OMIT_UTF16 | ||
437 | /* | ||
438 | ** Convert a UTF-16 string in the native encoding into a UTF-8 string. | ||
439 | ** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must | ||
440 | ** be freed by the calling function. | ||
441 | ** | ||
442 | ** NULL is returned if there is an allocation error. | ||
443 | */ | ||
444 | char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte){ | ||
445 | Mem m; | ||
446 | memset(&m, 0, sizeof(m)); | ||
447 | m.db = db; | ||
448 | sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC); | ||
449 | sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8); | ||
450 | if( db->mallocFailed ){ | ||
451 | sqlite3VdbeMemRelease(&m); | ||
452 | m.z = 0; | ||
453 | } | ||
454 | assert( (m.flags & MEM_Term)!=0 || db->mallocFailed ); | ||
455 | assert( (m.flags & MEM_Str)!=0 || db->mallocFailed ); | ||
456 | return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z); | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | ** pZ is a UTF-16 encoded unicode string. If nChar is less than zero, | ||
461 | ** return the number of bytes up to (but not including), the first pair | ||
462 | ** of consecutive 0x00 bytes in pZ. If nChar is not less than zero, | ||
463 | ** then return the number of bytes in the first nChar unicode characters | ||
464 | ** in pZ (or up until the first pair of 0x00 bytes, whichever comes first). | ||
465 | */ | ||
466 | int sqlite3Utf16ByteLen(const void *zIn, int nChar){ | ||
467 | unsigned int c = 1; | ||
468 | char const *z = zIn; | ||
469 | int n = 0; | ||
470 | if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ | ||
471 | /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here | ||
472 | ** and in other parts of this file means that at one branch will | ||
473 | ** not be covered by coverage testing on any single host. But coverage | ||
474 | ** will be complete if the tests are run on both a little-endian and | ||
475 | ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE | ||
476 | ** macros are constant at compile time the compiler can determine | ||
477 | ** which branch will be followed. It is therefore assumed that no runtime | ||
478 | ** penalty is paid for this "if" statement. | ||
479 | */ | ||
480 | while( c && ((nChar<0) || n<nChar) ){ | ||
481 | READ_UTF16BE(z, c); | ||
482 | n++; | ||
483 | } | ||
484 | }else{ | ||
485 | while( c && ((nChar<0) || n<nChar) ){ | ||
486 | READ_UTF16LE(z, c); | ||
487 | n++; | ||
488 | } | ||
489 | } | ||
490 | return (z-(char const *)zIn)-((c==0)?2:0); | ||
491 | } | ||
492 | |||
493 | #if defined(SQLITE_TEST) | ||
494 | /* | ||
495 | ** This routine is called from the TCL test function "translate_selftest". | ||
496 | ** It checks that the primitives for serializing and deserializing | ||
497 | ** characters in each encoding are inverses of each other. | ||
498 | */ | ||
499 | void sqlite3UtfSelfTest(){ | ||
500 | unsigned int i, t; | ||
501 | unsigned char zBuf[20]; | ||
502 | unsigned char *z; | ||
503 | unsigned char *zTerm; | ||
504 | int n; | ||
505 | unsigned int c; | ||
506 | |||
507 | for(i=0; i<0x00110000; i++){ | ||
508 | z = zBuf; | ||
509 | WRITE_UTF8(z, i); | ||
510 | n = z-zBuf; | ||
511 | z[0] = 0; | ||
512 | zTerm = z; | ||
513 | z = zBuf; | ||
514 | c = sqlite3Utf8Read(z, zTerm, (const u8**)&z); | ||
515 | t = i; | ||
516 | if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD; | ||
517 | if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD; | ||
518 | assert( c==t ); | ||
519 | assert( (z-zBuf)==n ); | ||
520 | } | ||
521 | for(i=0; i<0x00110000; i++){ | ||
522 | if( i>=0xD800 && i<0xE000 ) continue; | ||
523 | z = zBuf; | ||
524 | WRITE_UTF16LE(z, i); | ||
525 | n = z-zBuf; | ||
526 | z[0] = 0; | ||
527 | z = zBuf; | ||
528 | READ_UTF16LE(z, c); | ||
529 | assert( c==i ); | ||
530 | assert( (z-zBuf)==n ); | ||
531 | } | ||
532 | for(i=0; i<0x00110000; i++){ | ||
533 | if( i>=0xD800 && i<0xE000 ) continue; | ||
534 | z = zBuf; | ||
535 | WRITE_UTF16BE(z, i); | ||
536 | n = z-zBuf; | ||
537 | z[0] = 0; | ||
538 | z = zBuf; | ||
539 | READ_UTF16BE(z, c); | ||
540 | assert( c==i ); | ||
541 | assert( (z-zBuf)==n ); | ||
542 | } | ||
543 | } | ||
544 | #endif /* SQLITE_TEST */ | ||
545 | #endif /* SQLITE_OMIT_UTF16 */ | ||