aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/eina/src/lib/eina_unicode.c
diff options
context:
space:
mode:
authorDavid Walter Seikel2013-01-13 17:29:19 +1000
committerDavid Walter Seikel2013-01-13 17:29:19 +1000
commit07274513e984f0b5544586c74508ccd16e7dcafa (patch)
treeb32ff2a9136fbc1a4a6a0ed1e4d79cde0f5f16d9 /libraries/eina/src/lib/eina_unicode.c
parentAdded Irrlicht 1.8, but without all the Windows binaries. (diff)
downloadSledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.zip
SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.gz
SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.bz2
SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.xz
Remove EFL, since it's been released now.
Diffstat (limited to '')
-rw-r--r--libraries/eina/src/lib/eina_unicode.c450
1 files changed, 0 insertions, 450 deletions
diff --git a/libraries/eina/src/lib/eina_unicode.c b/libraries/eina/src/lib/eina_unicode.c
deleted file mode 100644
index 7505906..0000000
--- a/libraries/eina/src/lib/eina_unicode.c
+++ /dev/null
@@ -1,450 +0,0 @@
1/* EINA - EFL data type library
2 * Copyright (C) 2010 Tom Hacohen,
3 * Brett Nash
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library;
17 * if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#ifdef HAVE_CONFIG_H
21# include "config.h"
22#endif
23
24#include "eina_config.h"
25#include "eina_private.h"
26#include <string.h>
27
28/* undefs EINA_ARG_NONULL() so NULL checks are not compiled out! */
29#include "eina_safety_checks.h"
30#include "eina_unicode.h"
31
32/* FIXME: check if sizeof(wchar_t) == sizeof(Eina_Unicode) if so,
33 * probably better to use the standard functions */
34
35/* Maybe I'm too tired, but this is the only thing that actually worked. */
36const Eina_Unicode _EINA_UNICODE_EMPTY_STRING[1] = {0};
37EAPI const Eina_Unicode *EINA_UNICODE_EMPTY_STRING = _EINA_UNICODE_EMPTY_STRING;
38EAPI int
39eina_unicode_strcmp(const Eina_Unicode *a, const Eina_Unicode *b)
40{
41 EINA_SAFETY_ON_NULL_RETURN_VAL(a, -1);
42 EINA_SAFETY_ON_NULL_RETURN_VAL(b, -1);
43
44 for (; *a && *a == *b; a++, b++)
45 ;
46 if (*a == *b)
47 return 0;
48 else if (*a < *b)
49 return -1;
50 else
51 return 1;
52}
53
54EAPI Eina_Unicode *
55eina_unicode_strcpy(Eina_Unicode *dest, const Eina_Unicode *source)
56{
57 Eina_Unicode *ret = dest;
58
59 EINA_SAFETY_ON_NULL_RETURN_VAL(dest, NULL);
60 EINA_SAFETY_ON_NULL_RETURN_VAL(source, NULL);
61
62 while (*source)
63 *dest++ = *source++;
64 *dest = 0;
65 return ret;
66}
67
68EAPI Eina_Unicode *
69eina_unicode_strncpy(Eina_Unicode *dest, const Eina_Unicode *source, size_t n)
70{
71 Eina_Unicode *ret = dest;
72
73 EINA_SAFETY_ON_NULL_RETURN_VAL(dest, NULL);
74 EINA_SAFETY_ON_NULL_RETURN_VAL(source, NULL);
75
76 for ( ; n && *source ; n--)
77 *dest++ = *source++;
78 for (; n; n--)
79 *dest++ = 0;
80 return ret;
81}
82
83EAPI size_t
84eina_unicode_strlen(const Eina_Unicode *ustr)
85{
86 const Eina_Unicode *end;
87
88 EINA_SAFETY_ON_NULL_RETURN_VAL(ustr, 0);
89
90 for (end = ustr; *end; end++)
91 ;
92 return end - ustr;
93}
94
95EAPI size_t
96eina_unicode_strnlen(const Eina_Unicode *ustr, int n)
97{
98 const Eina_Unicode *end;
99 const Eina_Unicode *last = ustr + n; /* technically not portable ;-) */
100
101 EINA_SAFETY_ON_NULL_RETURN_VAL(ustr, 0);
102
103 for (end = ustr; end < last && *end; end++)
104 ;
105 return end - ustr;
106}
107
108
109
110
111EAPI Eina_Unicode *
112eina_unicode_strndup(const Eina_Unicode *text, size_t n)
113{
114 Eina_Unicode *ustr;
115
116 EINA_SAFETY_ON_NULL_RETURN_VAL(text, NULL);
117
118 ustr = malloc((n + 1) * sizeof(Eina_Unicode));
119 memcpy(ustr, text, n * sizeof(Eina_Unicode));
120 ustr[n] = 0;
121 return ustr;
122}
123
124EAPI Eina_Unicode *
125eina_unicode_strdup(const Eina_Unicode *text)
126{
127 size_t len;
128
129 EINA_SAFETY_ON_NULL_RETURN_VAL(text, NULL);
130
131 len = eina_unicode_strlen(text);
132 return eina_unicode_strndup(text, len);
133}
134
135EAPI Eina_Unicode *
136eina_unicode_strstr(const Eina_Unicode *haystack, const Eina_Unicode *needle)
137{
138 const Eina_Unicode *i, *j;
139
140 EINA_SAFETY_ON_NULL_RETURN_VAL(haystack, NULL);
141 EINA_SAFETY_ON_NULL_RETURN_VAL(needle, NULL);
142
143 for (i = haystack; *i; i++)
144 {
145 haystack = i; /* set this location as the base position */
146 for (j = needle; *j && *i && *j == *i; j++, i++)
147 ;
148
149 if (!*j) /*if we got to the end of j this means we got a full match */
150 {
151 return (Eina_Unicode *)haystack; /* return the new base position */
152 }
153 }
154
155 return NULL;
156}
157
158EAPI Eina_Unicode *
159eina_unicode_escape(const Eina_Unicode *str)
160{
161 Eina_Unicode *s2, *d;
162 const Eina_Unicode *s;
163
164 EINA_SAFETY_ON_NULL_RETURN_VAL(str, NULL);
165
166 s2 = malloc((eina_unicode_strlen(str) * 2) + 1);
167 if (!s2)
168 return NULL;
169
170 for (s = str, d = s2; *s != 0; s++, d++)
171 {
172 if ((*s == ' ') || (*s == '\\') || (*s == '\''))
173 {
174 *d = '\\';
175 d++;
176 }
177
178 *d = *s;
179 }
180 *d = 0;
181 return s2;
182}
183
184/* UTF-8 Handling */
185
186#define EINA_UNICODE_UTF8_BYTES_PER_CHAR 6
187/* The replacement range that will be used for bad utf8 chars. */
188#define ERROR_REPLACEMENT_BASE 0xDC80
189#define ERROR_REPLACEMENT_END 0xDCFF
190#define IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245))
191#define IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80)
192
193EAPI Eina_Unicode
194eina_unicode_utf8_get_next(const char *buf, int *iindex)
195{
196 int ind = *iindex;
197 Eina_Unicode r;
198 unsigned char d;
199
200 EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
201 EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0);
202
203 /* if this char is the null terminator, exit */
204 if ((d = buf[ind++]) == 0) return 0;
205
206 if ((d & 0x80) == 0)
207 { // 1 byte (7bit) - 0xxxxxxx
208 *iindex = ind;
209 return d;
210 }
211 if ((d & 0xe0) == 0xc0)
212 { // 2 byte (11bit) - 110xxxxx 10xxxxxx
213 r = (d & 0x1f) << 6;
214 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
215 !IS_CONTINUATION_BYTE(d)) goto error;
216 r |= (d & 0x3f);
217 if (r <= 0x7F) goto error;
218 *iindex = ind;
219 return r;
220 }
221 if ((d & 0xf0) == 0xe0)
222 { // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx
223 r = (d & 0x0f) << 12;
224 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
225 !IS_CONTINUATION_BYTE(d)) goto error;
226 r |= (d & 0x3f) << 6;
227 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
228 !IS_CONTINUATION_BYTE(d)) goto error;
229 r |= (d & 0x3f);
230 if (r <= 0x7FF) goto error;
231 *iindex = ind;
232 return r;
233 }
234 if ((d & 0xf8) == 0xf0)
235 { // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
236 r = (d & 0x07) << 18;
237 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
238 !IS_CONTINUATION_BYTE(d)) goto error;
239 r |= (d & 0x3f) << 12;
240 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
241 !IS_CONTINUATION_BYTE(d)) goto error;
242 r |= (d & 0x3f) << 6;
243 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
244 !IS_CONTINUATION_BYTE(d)) goto error;
245 r |= (d & 0x3f);
246 if (r <= 0xFFFF) goto error;
247 *iindex = ind;
248 return r;
249 }
250 if ((d & 0xfc) == 0xf8)
251 { // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
252 r = (d & 0x03) << 24;
253 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
254 !IS_CONTINUATION_BYTE(d)) goto error;
255 r |= (d & 0x3f) << 18;
256 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
257 !IS_CONTINUATION_BYTE(d)) goto error;
258 r |= (d & 0x3f) << 12;
259 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
260 !IS_CONTINUATION_BYTE(d)) goto error;
261 r |= (d & 0x3f) << 6;
262 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
263 !IS_CONTINUATION_BYTE(d)) goto error;
264 r |= (d & 0x3f);
265 if (r <= 0x1FFFFF) goto error;
266 *iindex = ind;
267 return r;
268 }
269 if ((d & 0xfe) == 0xfc)
270 { // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
271 r = (d & 0x01) << 30;
272 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
273 !IS_CONTINUATION_BYTE(d)) goto error;
274 r |= (d & 0x3f) << 24;
275 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
276 !IS_CONTINUATION_BYTE(d)) goto error;
277 r |= (d & 0x3f) << 18;
278 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
279 !IS_CONTINUATION_BYTE(d)) goto error;
280 r |= (d & 0x3f) << 12;
281 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
282 !IS_CONTINUATION_BYTE(d)) goto error;
283 r |= (d & 0x3f) << 6;
284 if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
285 !IS_CONTINUATION_BYTE(d)) goto error;
286 r |= (d & 0x3f);
287 if (r <= 0x3FFFFFF) goto error;
288 *iindex = ind;
289 return r;
290 }
291
292/* Gets here where there was an error and we want to replace the char
293 * we just use the invalid unicode codepoints 8 lower bits represent
294 * the original char */
295error:
296 d = buf[*iindex];
297 (*iindex)++;
298 return ERROR_REPLACEMENT_BASE | d;
299}
300
301EAPI Eina_Unicode
302eina_unicode_utf8_get_prev(const char *buf, int *iindex)
303{
304 int r, ind;
305
306 EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
307 EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0);
308
309 ind = *iindex;
310 /* First obtain the codepoint at iindex */
311 r = eina_unicode_utf8_get_next(buf, &ind);
312
313 /* although when ind == 0 there's no previous char, we still want to get
314 * the current char */
315 if (*iindex <= 0)
316 return r;
317
318 /* Next advance iindex to previous codepoint */
319 ind = *iindex;
320 ind--;
321 while ((ind > 0) && ((buf[ind] & 0xc0) == 0x80))
322 ind--;
323
324 *iindex = ind;
325 return r;
326}
327
328EAPI int
329eina_unicode_utf8_get_len(const char *buf)
330{
331 /* returns the number of utf8 characters (not bytes) in the string */
332 int i = 0, len = 0;
333
334 EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
335
336 while (eina_unicode_utf8_get_next(buf, &i))
337 len++;
338
339 return len;
340}
341
342EAPI Eina_Unicode *
343eina_unicode_utf8_to_unicode(const char *utf, int *_len)
344{
345 /* FIXME: Should optimize! */
346 int len, i;
347 int ind;
348 Eina_Unicode *buf, *uind;
349
350 EINA_SAFETY_ON_NULL_RETURN_VAL(utf, NULL);
351
352 len = eina_unicode_utf8_get_len(utf);
353 if (_len)
354 *_len = len;
355 buf = (Eina_Unicode *) calloc(sizeof(Eina_Unicode), (len + 1));
356 if (!buf) return buf;
357
358 for (i = 0, ind = 0, uind = buf ; i < len ; i++, uind++)
359 {
360 *uind = eina_unicode_utf8_get_next(utf, &ind);
361 }
362
363 return buf;
364}
365
366EAPI char *
367eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len)
368{
369 char *buf;
370 const Eina_Unicode *uind;
371 char *ind;
372 int ulen, len;
373
374 EINA_SAFETY_ON_NULL_RETURN_VAL(uni, NULL);
375
376 ulen = eina_unicode_strlen(uni);
377 buf = (char *) calloc(ulen + 1, EINA_UNICODE_UTF8_BYTES_PER_CHAR);
378
379 len = 0;
380 for (uind = uni, ind = buf ; *uind ; uind++)
381 {
382 if (*uind <= 0x7F) /* 1 byte char */
383 {
384 *ind++ = *uind;
385 len += 1;
386 }
387 else if (*uind <= 0x7FF) /* 2 byte char */
388 {
389 *ind++ = 0xC0 | (unsigned char) (*uind >> 6);
390 *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
391 len += 2;
392 }
393 else if (*uind <= 0xFFFF) /* 3 byte char */
394 {
395 /* If it's a special replacement codepoint */
396 if (*uind >= ERROR_REPLACEMENT_BASE &&
397 *uind <= ERROR_REPLACEMENT_END)
398 {
399 *ind++ = *uind & 0xFF;
400 len += 1;
401 }
402 else
403 {
404 *ind++ = 0xE0 | (unsigned char) (*uind >> 12);
405 *ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
406 *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
407 len += 3;
408 }
409 }
410 else if (*uind <= 0x1FFFFF) /* 4 byte char */
411 {
412 *ind++ = 0xF0 | (unsigned char) ((*uind >> 18) & 0x07);
413 *ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
414 *ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
415 *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
416 len += 4;
417 }
418 else if (*uind <= 0x3FFFFFF) /* 5 byte char */
419 {
420 *ind++ = 0xF8 | (unsigned char) ((*uind >> 24) & 0x03);
421 *ind++ = 0x80 | (unsigned char) ((*uind >> 18) & 0x3F);
422 *ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
423 *ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
424 *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
425 len += 5;
426 }
427 else if (*uind <= 0x7FFFFFFF) /* 6 byte char */
428 {
429 *ind++ = 0xFC | (unsigned char) ((*uind >> 30) & 0x01);
430 *ind++ = 0x80 | (unsigned char) ((*uind >> 24) & 0x3F);
431 *ind++ = 0x80 | (unsigned char) ((*uind >> 18) & 0x3F);
432 *ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
433 *ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
434 *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
435 len += 6;
436 }
437 else /* error */
438 {
439 /* Do something */
440 }
441 }
442 buf = realloc(buf, len + 1);
443 buf[len] = '\0';
444 if (_len)
445 *_len = len;
446 return buf;
447}
448
449
450