diff options
Diffstat (limited to 'libraries/sqlite/unix/sqlite-3.5.1/ext/icu/icu.c')
-rw-r--r-- | libraries/sqlite/unix/sqlite-3.5.1/ext/icu/icu.c | 499 |
1 files changed, 499 insertions, 0 deletions
diff --git a/libraries/sqlite/unix/sqlite-3.5.1/ext/icu/icu.c b/libraries/sqlite/unix/sqlite-3.5.1/ext/icu/icu.c new file mode 100644 index 0000000..11bb116 --- /dev/null +++ b/libraries/sqlite/unix/sqlite-3.5.1/ext/icu/icu.c | |||
@@ -0,0 +1,499 @@ | |||
1 | /* | ||
2 | ** 2007 May 6 | ||
3 | ** | ||
4 | ** The author disclaims copyright to this source code. In place of | ||
5 | ** a legal notice, here is a blessing: | ||
6 | ** | ||
7 | ** May you do good and not evil. | ||
8 | ** May you find forgiveness for yourself and forgive others. | ||
9 | ** May you share freely, never taking more than you give. | ||
10 | ** | ||
11 | ************************************************************************* | ||
12 | ** $Id: icu.c,v 1.6 2007/06/22 15:21:16 danielk1977 Exp $ | ||
13 | ** | ||
14 | ** This file implements an integration between the ICU library | ||
15 | ** ("International Components for Unicode", an open-source library | ||
16 | ** for handling unicode data) and SQLite. The integration uses | ||
17 | ** ICU to provide the following to SQLite: | ||
18 | ** | ||
19 | ** * An implementation of the SQL regexp() function (and hence REGEXP | ||
20 | ** operator) using the ICU uregex_XX() APIs. | ||
21 | ** | ||
22 | ** * Implementations of the SQL scalar upper() and lower() functions | ||
23 | ** for case mapping. | ||
24 | ** | ||
25 | ** * Integration of ICU and SQLite collation seqences. | ||
26 | ** | ||
27 | ** * An implementation of the LIKE operator that uses ICU to | ||
28 | ** provide case-independent matching. | ||
29 | */ | ||
30 | |||
31 | #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) | ||
32 | |||
33 | /* Include ICU headers */ | ||
34 | #include <unicode/utypes.h> | ||
35 | #include <unicode/uregex.h> | ||
36 | #include <unicode/ustring.h> | ||
37 | #include <unicode/ucol.h> | ||
38 | |||
39 | #include <assert.h> | ||
40 | |||
41 | #ifndef SQLITE_CORE | ||
42 | #include "sqlite3ext.h" | ||
43 | SQLITE_EXTENSION_INIT1 | ||
44 | #else | ||
45 | #include "sqlite3.h" | ||
46 | #endif | ||
47 | |||
48 | /* | ||
49 | ** Maximum length (in bytes) of the pattern in a LIKE or GLOB | ||
50 | ** operator. | ||
51 | */ | ||
52 | #ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH | ||
53 | # define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 | ||
54 | #endif | ||
55 | |||
56 | /* | ||
57 | ** Version of sqlite3_free() that is always a function, never a macro. | ||
58 | */ | ||
59 | static void xFree(void *p){ | ||
60 | sqlite3_free(p); | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | ** Compare two UTF-8 strings for equality where the first string is | ||
65 | ** a "LIKE" expression. Return true (1) if they are the same and | ||
66 | ** false (0) if they are different. | ||
67 | */ | ||
68 | static int icuLikeCompare( | ||
69 | const uint8_t *zPattern, /* LIKE pattern */ | ||
70 | const uint8_t *zString, /* The UTF-8 string to compare against */ | ||
71 | const UChar32 uEsc /* The escape character */ | ||
72 | ){ | ||
73 | static const int MATCH_ONE = (UChar32)'_'; | ||
74 | static const int MATCH_ALL = (UChar32)'%'; | ||
75 | |||
76 | int iPattern = 0; /* Current byte index in zPattern */ | ||
77 | int iString = 0; /* Current byte index in zString */ | ||
78 | |||
79 | int prevEscape = 0; /* True if the previous character was uEsc */ | ||
80 | |||
81 | while( zPattern[iPattern]!=0 ){ | ||
82 | |||
83 | /* Read (and consume) the next character from the input pattern. */ | ||
84 | UChar32 uPattern; | ||
85 | U8_NEXT_UNSAFE(zPattern, iPattern, uPattern); | ||
86 | assert(uPattern!=0); | ||
87 | |||
88 | /* There are now 4 possibilities: | ||
89 | ** | ||
90 | ** 1. uPattern is an unescaped match-all character "%", | ||
91 | ** 2. uPattern is an unescaped match-one character "_", | ||
92 | ** 3. uPattern is an unescaped escape character, or | ||
93 | ** 4. uPattern is to be handled as an ordinary character | ||
94 | */ | ||
95 | if( !prevEscape && uPattern==MATCH_ALL ){ | ||
96 | /* Case 1. */ | ||
97 | uint8_t c; | ||
98 | |||
99 | /* Skip any MATCH_ALL or MATCH_ONE characters that follow a | ||
100 | ** MATCH_ALL. For each MATCH_ONE, skip one character in the | ||
101 | ** test string. | ||
102 | */ | ||
103 | while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){ | ||
104 | if( c==MATCH_ONE ){ | ||
105 | if( zString[iString]==0 ) return 0; | ||
106 | U8_FWD_1_UNSAFE(zString, iString); | ||
107 | } | ||
108 | iPattern++; | ||
109 | } | ||
110 | |||
111 | if( zPattern[iPattern]==0 ) return 1; | ||
112 | |||
113 | while( zString[iString] ){ | ||
114 | if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){ | ||
115 | return 1; | ||
116 | } | ||
117 | U8_FWD_1_UNSAFE(zString, iString); | ||
118 | } | ||
119 | return 0; | ||
120 | |||
121 | }else if( !prevEscape && uPattern==MATCH_ONE ){ | ||
122 | /* Case 2. */ | ||
123 | if( zString[iString]==0 ) return 0; | ||
124 | U8_FWD_1_UNSAFE(zString, iString); | ||
125 | |||
126 | }else if( !prevEscape && uPattern==uEsc){ | ||
127 | /* Case 3. */ | ||
128 | prevEscape = 1; | ||
129 | |||
130 | }else{ | ||
131 | /* Case 4. */ | ||
132 | UChar32 uString; | ||
133 | U8_NEXT_UNSAFE(zString, iString, uString); | ||
134 | uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); | ||
135 | uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); | ||
136 | if( uString!=uPattern ){ | ||
137 | return 0; | ||
138 | } | ||
139 | prevEscape = 0; | ||
140 | } | ||
141 | } | ||
142 | |||
143 | return zString[iString]==0; | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | ** Implementation of the like() SQL function. This function implements | ||
148 | ** the build-in LIKE operator. The first argument to the function is the | ||
149 | ** pattern and the second argument is the string. So, the SQL statements: | ||
150 | ** | ||
151 | ** A LIKE B | ||
152 | ** | ||
153 | ** is implemented as like(B, A). If there is an escape character E, | ||
154 | ** | ||
155 | ** A LIKE B ESCAPE E | ||
156 | ** | ||
157 | ** is mapped to like(B, A, E). | ||
158 | */ | ||
159 | static void icuLikeFunc( | ||
160 | sqlite3_context *context, | ||
161 | int argc, | ||
162 | sqlite3_value **argv | ||
163 | ){ | ||
164 | const unsigned char *zA = sqlite3_value_text(argv[0]); | ||
165 | const unsigned char *zB = sqlite3_value_text(argv[1]); | ||
166 | UChar32 uEsc = 0; | ||
167 | |||
168 | /* Limit the length of the LIKE or GLOB pattern to avoid problems | ||
169 | ** of deep recursion and N*N behavior in patternCompare(). | ||
170 | */ | ||
171 | if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){ | ||
172 | sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); | ||
173 | return; | ||
174 | } | ||
175 | |||
176 | |||
177 | if( argc==3 ){ | ||
178 | /* The escape character string must consist of a single UTF-8 character. | ||
179 | ** Otherwise, return an error. | ||
180 | */ | ||
181 | int nE= sqlite3_value_bytes(argv[2]); | ||
182 | const unsigned char *zE = sqlite3_value_text(argv[2]); | ||
183 | int i = 0; | ||
184 | if( zE==0 ) return; | ||
185 | U8_NEXT(zE, i, nE, uEsc); | ||
186 | if( i!=nE){ | ||
187 | sqlite3_result_error(context, | ||
188 | "ESCAPE expression must be a single character", -1); | ||
189 | return; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | if( zA && zB ){ | ||
194 | sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc)); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | ** This function is called when an ICU function called from within | ||
200 | ** the implementation of an SQL scalar function returns an error. | ||
201 | ** | ||
202 | ** The scalar function context passed as the first argument is | ||
203 | ** loaded with an error message based on the following two args. | ||
204 | */ | ||
205 | static void icuFunctionError( | ||
206 | sqlite3_context *pCtx, /* SQLite scalar function context */ | ||
207 | const char *zName, /* Name of ICU function that failed */ | ||
208 | UErrorCode e /* Error code returned by ICU function */ | ||
209 | ){ | ||
210 | char zBuf[128]; | ||
211 | sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); | ||
212 | zBuf[127] = '\0'; | ||
213 | sqlite3_result_error(pCtx, zBuf, -1); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | ** Function to delete compiled regexp objects. Registered as | ||
218 | ** a destructor function with sqlite3_set_auxdata(). | ||
219 | */ | ||
220 | static void icuRegexpDelete(void *p){ | ||
221 | URegularExpression *pExpr = (URegularExpression *)p; | ||
222 | uregex_close(pExpr); | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | ** Implementation of SQLite REGEXP operator. This scalar function takes | ||
227 | ** two arguments. The first is a regular expression pattern to compile | ||
228 | ** the second is a string to match against that pattern. If either | ||
229 | ** argument is an SQL NULL, then NULL Is returned. Otherwise, the result | ||
230 | ** is 1 if the string matches the pattern, or 0 otherwise. | ||
231 | ** | ||
232 | ** SQLite maps the regexp() function to the regexp() operator such | ||
233 | ** that the following two are equivalent: | ||
234 | ** | ||
235 | ** zString REGEXP zPattern | ||
236 | ** regexp(zPattern, zString) | ||
237 | ** | ||
238 | ** Uses the following ICU regexp APIs: | ||
239 | ** | ||
240 | ** uregex_open() | ||
241 | ** uregex_matches() | ||
242 | ** uregex_close() | ||
243 | */ | ||
244 | static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ | ||
245 | UErrorCode status = U_ZERO_ERROR; | ||
246 | URegularExpression *pExpr; | ||
247 | UBool res; | ||
248 | const UChar *zString = sqlite3_value_text16(apArg[1]); | ||
249 | |||
250 | /* If the left hand side of the regexp operator is NULL, | ||
251 | ** then the result is also NULL. | ||
252 | */ | ||
253 | if( !zString ){ | ||
254 | return; | ||
255 | } | ||
256 | |||
257 | pExpr = sqlite3_get_auxdata(p, 0); | ||
258 | if( !pExpr ){ | ||
259 | const UChar *zPattern = sqlite3_value_text16(apArg[0]); | ||
260 | if( !zPattern ){ | ||
261 | return; | ||
262 | } | ||
263 | pExpr = uregex_open(zPattern, -1, 0, 0, &status); | ||
264 | |||
265 | if( U_SUCCESS(status) ){ | ||
266 | sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); | ||
267 | }else{ | ||
268 | assert(!pExpr); | ||
269 | icuFunctionError(p, "uregex_open", status); | ||
270 | return; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | /* Configure the text that the regular expression operates on. */ | ||
275 | uregex_setText(pExpr, zString, -1, &status); | ||
276 | if( !U_SUCCESS(status) ){ | ||
277 | icuFunctionError(p, "uregex_setText", status); | ||
278 | return; | ||
279 | } | ||
280 | |||
281 | /* Attempt the match */ | ||
282 | res = uregex_matches(pExpr, 0, &status); | ||
283 | if( !U_SUCCESS(status) ){ | ||
284 | icuFunctionError(p, "uregex_matches", status); | ||
285 | return; | ||
286 | } | ||
287 | |||
288 | /* Set the text that the regular expression operates on to a NULL | ||
289 | ** pointer. This is not really necessary, but it is tidier than | ||
290 | ** leaving the regular expression object configured with an invalid | ||
291 | ** pointer after this function returns. | ||
292 | */ | ||
293 | uregex_setText(pExpr, 0, 0, &status); | ||
294 | |||
295 | /* Return 1 or 0. */ | ||
296 | sqlite3_result_int(p, res ? 1 : 0); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | ** Implementations of scalar functions for case mapping - upper() and | ||
301 | ** lower(). Function upper() converts it's input to upper-case (ABC). | ||
302 | ** Function lower() converts to lower-case (abc). | ||
303 | ** | ||
304 | ** ICU provides two types of case mapping, "general" case mapping and | ||
305 | ** "language specific". Refer to ICU documentation for the differences | ||
306 | ** between the two. | ||
307 | ** | ||
308 | ** To utilise "general" case mapping, the upper() or lower() scalar | ||
309 | ** functions are invoked with one argument: | ||
310 | ** | ||
311 | ** upper('ABC') -> 'abc' | ||
312 | ** lower('abc') -> 'ABC' | ||
313 | ** | ||
314 | ** To access ICU "language specific" case mapping, upper() or lower() | ||
315 | ** should be invoked with two arguments. The second argument is the name | ||
316 | ** of the locale to use. Passing an empty string ("") or SQL NULL value | ||
317 | ** as the second argument is the same as invoking the 1 argument version | ||
318 | ** of upper() or lower(). | ||
319 | ** | ||
320 | ** lower('I', 'en_us') -> 'i' | ||
321 | ** lower('I', 'tr_tr') -> 'ı' (small dotless i) | ||
322 | ** | ||
323 | ** http://www.icu-project.org/userguide/posix.html#case_mappings | ||
324 | */ | ||
325 | static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ | ||
326 | const UChar *zInput; | ||
327 | UChar *zOutput; | ||
328 | int nInput; | ||
329 | int nOutput; | ||
330 | |||
331 | UErrorCode status = U_ZERO_ERROR; | ||
332 | const char *zLocale = 0; | ||
333 | |||
334 | assert(nArg==1 || nArg==2); | ||
335 | if( nArg==2 ){ | ||
336 | zLocale = (const char *)sqlite3_value_text(apArg[1]); | ||
337 | } | ||
338 | |||
339 | zInput = sqlite3_value_text16(apArg[0]); | ||
340 | if( !zInput ){ | ||
341 | return; | ||
342 | } | ||
343 | nInput = sqlite3_value_bytes16(apArg[0]); | ||
344 | |||
345 | nOutput = nInput * 2 + 2; | ||
346 | zOutput = sqlite3_malloc(nOutput); | ||
347 | if( !zOutput ){ | ||
348 | return; | ||
349 | } | ||
350 | |||
351 | if( sqlite3_user_data(p) ){ | ||
352 | u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); | ||
353 | }else{ | ||
354 | u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); | ||
355 | } | ||
356 | |||
357 | if( !U_SUCCESS(status) ){ | ||
358 | icuFunctionError(p, "u_strToLower()/u_strToUpper", status); | ||
359 | return; | ||
360 | } | ||
361 | |||
362 | sqlite3_result_text16(p, zOutput, -1, xFree); | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | ** Collation sequence destructor function. The pCtx argument points to | ||
367 | ** a UCollator structure previously allocated using ucol_open(). | ||
368 | */ | ||
369 | static void icuCollationDel(void *pCtx){ | ||
370 | UCollator *p = (UCollator *)pCtx; | ||
371 | ucol_close(p); | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | ** Collation sequence comparison function. The pCtx argument points to | ||
376 | ** a UCollator structure previously allocated using ucol_open(). | ||
377 | */ | ||
378 | static int icuCollationColl( | ||
379 | void *pCtx, | ||
380 | int nLeft, | ||
381 | const void *zLeft, | ||
382 | int nRight, | ||
383 | const void *zRight | ||
384 | ){ | ||
385 | UCollationResult res; | ||
386 | UCollator *p = (UCollator *)pCtx; | ||
387 | res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); | ||
388 | switch( res ){ | ||
389 | case UCOL_LESS: return -1; | ||
390 | case UCOL_GREATER: return +1; | ||
391 | case UCOL_EQUAL: return 0; | ||
392 | } | ||
393 | assert(!"Unexpected return value from ucol_strcoll()"); | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | ** Implementation of the scalar function icu_load_collation(). | ||
399 | ** | ||
400 | ** This scalar function is used to add ICU collation based collation | ||
401 | ** types to an SQLite database connection. It is intended to be called | ||
402 | ** as follows: | ||
403 | ** | ||
404 | ** SELECT icu_load_collation(<locale>, <collation-name>); | ||
405 | ** | ||
406 | ** Where <locale> is a string containing an ICU locale identifier (i.e. | ||
407 | ** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the | ||
408 | ** collation sequence to create. | ||
409 | */ | ||
410 | static void icuLoadCollation( | ||
411 | sqlite3_context *p, | ||
412 | int nArg, | ||
413 | sqlite3_value **apArg | ||
414 | ){ | ||
415 | sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); | ||
416 | UErrorCode status = U_ZERO_ERROR; | ||
417 | const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ | ||
418 | const char *zName; /* SQL Collation sequence name (eg. "japanese") */ | ||
419 | UCollator *pUCollator; /* ICU library collation object */ | ||
420 | int rc; /* Return code from sqlite3_create_collation_x() */ | ||
421 | |||
422 | assert(nArg==2); | ||
423 | zLocale = (const char *)sqlite3_value_text(apArg[0]); | ||
424 | zName = (const char *)sqlite3_value_text(apArg[1]); | ||
425 | |||
426 | if( !zLocale || !zName ){ | ||
427 | return; | ||
428 | } | ||
429 | |||
430 | pUCollator = ucol_open(zLocale, &status); | ||
431 | if( !U_SUCCESS(status) ){ | ||
432 | icuFunctionError(p, "ucol_open", status); | ||
433 | return; | ||
434 | } | ||
435 | assert(p); | ||
436 | |||
437 | rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, | ||
438 | icuCollationColl, icuCollationDel | ||
439 | ); | ||
440 | if( rc!=SQLITE_OK ){ | ||
441 | ucol_close(pUCollator); | ||
442 | sqlite3_result_error(p, "Error registering collation function", -1); | ||
443 | } | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | ** Register the ICU extension functions with database db. | ||
448 | */ | ||
449 | int sqlite3IcuInit(sqlite3 *db){ | ||
450 | struct IcuScalar { | ||
451 | const char *zName; /* Function name */ | ||
452 | int nArg; /* Number of arguments */ | ||
453 | int enc; /* Optimal text encoding */ | ||
454 | void *pContext; /* sqlite3_user_data() context */ | ||
455 | void (*xFunc)(sqlite3_context*,int,sqlite3_value**); | ||
456 | } scalars[] = { | ||
457 | {"regexp",-1, SQLITE_ANY, 0, icuRegexpFunc}, | ||
458 | |||
459 | {"lower", 1, SQLITE_UTF16, 0, icuCaseFunc16}, | ||
460 | {"lower", 2, SQLITE_UTF16, 0, icuCaseFunc16}, | ||
461 | {"upper", 1, SQLITE_UTF16, (void*)1, icuCaseFunc16}, | ||
462 | {"upper", 2, SQLITE_UTF16, (void*)1, icuCaseFunc16}, | ||
463 | |||
464 | {"lower", 1, SQLITE_UTF8, 0, icuCaseFunc16}, | ||
465 | {"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16}, | ||
466 | {"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16}, | ||
467 | {"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16}, | ||
468 | |||
469 | {"like", 2, SQLITE_UTF8, 0, icuLikeFunc}, | ||
470 | {"like", 3, SQLITE_UTF8, 0, icuLikeFunc}, | ||
471 | |||
472 | {"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation}, | ||
473 | }; | ||
474 | |||
475 | int rc = SQLITE_OK; | ||
476 | int i; | ||
477 | |||
478 | for(i=0; rc==SQLITE_OK && i<(sizeof(scalars)/sizeof(struct IcuScalar)); i++){ | ||
479 | struct IcuScalar *p = &scalars[i]; | ||
480 | rc = sqlite3_create_function( | ||
481 | db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0 | ||
482 | ); | ||
483 | } | ||
484 | |||
485 | return rc; | ||
486 | } | ||
487 | |||
488 | #if !SQLITE_CORE | ||
489 | int sqlite3_extension_init( | ||
490 | sqlite3 *db, | ||
491 | char **pzErrMsg, | ||
492 | const sqlite3_api_routines *pApi | ||
493 | ){ | ||
494 | SQLITE_EXTENSION_INIT2(pApi) | ||
495 | return sqlite3IcuInit(db); | ||
496 | } | ||
497 | #endif | ||
498 | |||
499 | #endif | ||