1 files changed, 405 insertions, 0 deletions
diff --git a/libraries/eina/src/lib/eina_unicode.c b/libraries/eina/src/lib/eina_unicode.c
new file mode 100644
index 0000000..342e3cb
--- /dev/null
+++ b/libraries/eina/src/lib/eina_unicode.c
@@ -0,0 +1,405 @@
+/* EINA - EFL data type library
+ * Copyright (C) 2010 Tom Hacohen,
+ *              Brett Nash
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library;
+ * if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <Eina.h>
+#include "eina_unicode.h"
+/* FIXME: check if sizeof(wchar_t) == sizeof(Eina_Unicode) if so,
+ * probably better to use the standard functions */
+/* Maybe I'm too tired, but this is the only thing that actually worked. */
+const Eina_Unicode _EINA_UNICODE_EMPTY_STRING[1] = {0};
+EAPI const Eina_Unicode *EINA_UNICODE_EMPTY_STRING = _EINA_UNICODE_EMPTY_STRING;
+EAPI int
+eina_unicode_strcmp(const Eina_Unicode *a, const Eina_Unicode *b)
+{
+   for (; *a && *a == *b; a++, b++)
+      ;
+   if (*a == *b)
+      return 0;
+   else if (*a < *b)
+      return -1;
+   else
+      return 1;
+}
+EAPI Eina_Unicode *
+eina_unicode_strcpy(Eina_Unicode *dest, const Eina_Unicode *source)
+{
+   Eina_Unicode *ret = dest;
+   while (*source)
+      *dest++ = *source++;
+   *dest = 0;
+   return ret;
+}
+EAPI Eina_Unicode *
+eina_unicode_strncpy(Eina_Unicode *dest, const Eina_Unicode *source, size_t n)
+{
+   Eina_Unicode *ret = dest;
+   for ( ; n && *source ; n--)
+      *dest++ = *source++;
+   for (; n; n--)
+      *dest++ = 0;
+   return ret;
+}
+EAPI size_t
+eina_unicode_strlen(const Eina_Unicode *ustr)
+{
+   const Eina_Unicode *end;
+   for (end = ustr; *end; end++)
+      ;
+   return end - ustr;
+}
+EAPI size_t
+eina_unicode_strnlen(const Eina_Unicode *ustr, int n)
+{
+   const Eina_Unicode *end;
+   const Eina_Unicode *last = ustr + n; /* technically not portable ;-) */
+   for (end = ustr; end < last && *end; end++)
+      ;
+   return end - ustr;
+}
+EAPI Eina_Unicode *
+eina_unicode_strndup(const Eina_Unicode *text, size_t n)
+{
+   Eina_Unicode *ustr;
+   ustr = (Eina_Unicode *) malloc((n + 1) * sizeof(Eina_Unicode));
+   memcpy(ustr, text, n * sizeof(Eina_Unicode));
+   ustr[n] = 0;
+   return ustr;
+}
+EAPI Eina_Unicode *
+eina_unicode_strdup(const Eina_Unicode *text)
+{
+   size_t len;
+   len = eina_unicode_strlen(text);
+   return eina_unicode_strndup(text, len);
+}
+EAPI Eina_Unicode *
+eina_unicode_strstr(const Eina_Unicode *haystack, const Eina_Unicode *needle)
+{
+   const Eina_Unicode *i, *j;
+   for (i = haystack; *i; i++)
+     {
+        haystack = i; /* set this location as the base position */
+        for (j = needle; *j && *i && *j == *i; j++, i++)
+           ;
+        if (!*j) /*if we got to the end of j this means we got a full match */
+          {
+             return (Eina_Unicode *)haystack; /* return the new base position */
+          }
+     }
+   return NULL;
+}
+EAPI Eina_Unicode *
+eina_unicode_escape(const Eina_Unicode *str)
+{
+   Eina_Unicode *s2, *d;
+   const Eina_Unicode *s;
+   s2 = malloc((eina_unicode_strlen(str) * 2) + 1);
+   if (!s2)
+      return NULL;
+   for (s = str, d = s2; *s != 0; s++, d++)
+     {
+        if ((*s == ' ') || (*s == '\\') || (*s == '\''))
+          {
+             *d = '\\';
+             d++;
+          }
+        *d = *s;
+     }
+   *d = 0;
+   return s2;
+}
+/* UTF-8 Handling */
+#define EINA_UNICODE_UTF8_BYTES_PER_CHAR 6
+/* The replacement range that will be used for bad utf8 chars. */
+#define ERROR_REPLACEMENT_BASE  0xDC80
+#define ERROR_REPLACEMENT_END   0xDCFF
+#define IS_INVALID_BYTE(x)      ((x == 192) || (x == 193) || (x >= 245))
+#define IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80)
+EAPI Eina_Unicode
+eina_unicode_utf8_get_next(const char *buf, int *iindex)
+{
+   int ind = *iindex;
+   Eina_Unicode r;
+   unsigned char d;
+   /* if this char is the null terminator, exit */
+   if ((d = buf[ind++]) == 0) return 0;
+   if ((d & 0x80) == 0)
+     { // 1 byte (7bit) - 0xxxxxxx
+        *iindex = ind;
+        return d;
+     }
+   if ((d & 0xe0) == 0xc0)
+     { // 2 byte (11bit) - 110xxxxx 10xxxxxx
+        r  = (d & 0x1f) << 6;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f);
+        if (r <= 0x7F) goto error;
+        *iindex = ind;
+        return r;
+     }
+   if ((d & 0xf0) == 0xe0)
+     { // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx
+        r  = (d & 0x0f) << 12;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 6;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f);
+        if (r <= 0x7FF) goto error;
+        *iindex = ind;
+        return r;
+     }
+   if ((d & 0xf8) == 0xf0)
+     { // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+        r  = (d & 0x07) << 18;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 12;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 6;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f);
+        if (r <= 0xFFFF) goto error;
+        *iindex = ind;
+        return r;
+     }
+   if ((d & 0xfc) == 0xf8)
+     { // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+        r  = (d & 0x03) << 24;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 18;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 12;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 6;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f);
+        if (r <= 0x1FFFFF) goto error;
+        *iindex = ind;
+        return r;
+     }
+   if ((d & 0xfe) == 0xfc)
+     { // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+        r  = (d & 0x01) << 30;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 24;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 18;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 12;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f) << 6;
+        if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
+            !IS_CONTINUATION_BYTE(d)) goto error;
+        r |= (d & 0x3f);
+        if (r <= 0x3FFFFFF) goto error;
+        *iindex = ind;
+        return r;
+     }
+/* Gets here where there was an error and we want to replace the char
+ * we just use the invalid unicode codepoints 8 lower bits represent
+ * the original char */
+error:
+   d = buf[*iindex];
+   (*iindex)++;
+   return ERROR_REPLACEMENT_BASE | d;
+}
+EAPI Eina_Unicode
+eina_unicode_utf8_get_prev(const char *buf, int *iindex)
+{
+   int r;
+   int ind = *iindex;
+   /* First obtain the codepoint at iindex */
+   r = eina_unicode_utf8_get_next(buf, &ind);
+   /* although when ind == 0 there's no previous char, we still want to get
+    * the current char */
+   if (*iindex <= 0)
+     return r;
+   /* Next advance iindex to previous codepoint */
+   ind = *iindex;
+   ind--;
+   while ((ind > 0) && ((buf[ind] & 0xc0) == 0x80))
+     ind--;
+   *iindex = ind;
+   return r;
+}
+EAPI int
+eina_unicode_utf8_get_len(const char *buf)
+{
+   /* returns the number of utf8 characters (not bytes) in the string */
+   int i = 0, len = 0;
+   while (eina_unicode_utf8_get_next(buf, &i))
+        len++;
+   return len;
+}
+EAPI Eina_Unicode *
+eina_unicode_utf8_to_unicode(const char *utf, int *_len)
+{
+   /* FIXME: Should optimize! */
+   int len, i;
+   int ind;
+   Eina_Unicode *buf, *uind;
+   len = eina_unicode_utf8_get_len(utf);
+   if (_len)
+      *_len = len;
+   buf = (Eina_Unicode *) calloc(sizeof(Eina_Unicode), (len + 1));
+   if (!buf) return buf;
+   for (i = 0, ind = 0, uind = buf ; i < len ; i++, uind++)
+     {
+        *uind = eina_unicode_utf8_get_next(utf, &ind);
+     }
+   return buf;
+}
+EAPI char *
+eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len)
+{
+   char *buf;
+   const Eina_Unicode *uind;
+   char *ind;
+   int ulen, len;
+   ulen = eina_unicode_strlen(uni);
+   buf = (char *) calloc(ulen + 1, EINA_UNICODE_UTF8_BYTES_PER_CHAR);
+   len = 0;
+   for (uind = uni, ind = buf ; *uind ; uind++)
+     {
+        if (*uind <= 0x7F) /* 1 byte char */
+          {
+             *ind++ = *uind;
+             len += 1;
+          }
+        else if (*uind <= 0x7FF) /* 2 byte char */
+          {
+             *ind++ = 0xC0 | (unsigned char) (*uind >> 6);
+             *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
+             len += 2;
+          }
+        else if (*uind <= 0xFFFF) /* 3 byte char */
+          {
+             /* If it's a special replacement codepoint */
+             if (*uind >= ERROR_REPLACEMENT_BASE &&
+                 *uind <= ERROR_REPLACEMENT_END)
+               {
+                  *ind++ = *uind & 0xFF;
+                  len += 1;
+               }
+             else
+               {
+                  *ind++ = 0xE0 | (unsigned char) (*uind >> 12);
+                  *ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
+                  *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
+                  len += 3;
+               }
+          }
+        else if (*uind <= 0x1FFFFF) /* 4 byte char */
+          {
+             *ind++ = 0xF0 | (unsigned char) ((*uind >> 18) & 0x07);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
+             len += 4;
+          }
+        else if (*uind <= 0x3FFFFFF) /* 5 byte char */
+          {
+             *ind++ = 0xF8 | (unsigned char) ((*uind >> 24) & 0x03);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 18) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
+             len += 5;
+          }
+        else if (*uind <= 0x7FFFFFFF) /* 6 byte char */
+          {
+             *ind++ = 0xFC | (unsigned char) ((*uind >> 30) & 0x01);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 24) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 18) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
+             *ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
+             len += 6;
+          }
+        else /* error */
+          {
+             /* Do something */
+          }
+     }
+   buf = realloc(buf, len + 1);
+   buf[len] = '\0';
+   if (_len)
+      *_len = len;
+   return buf;
+}

diff --git a/libraries/eina/src/lib/eina_unicode.c b/libraries/eina/src/lib/eina_unicode.c new file mode 100644 index 0000000..342e3cb --- /dev/null +++ b/libraries/eina/src/lib/eina_unicode.c
@@ -0,0 +1,405 @@
	1	/* EINA - EFL data type library
	2	* Copyright (C) 2010 Tom Hacohen,
	3	* Brett Nash
	4	*
	5	* This library is free software; you can redistribute it and/or
	6	* modify it under the terms of the GNU Lesser General Public
	7	* License as published by the Free Software Foundation; either
	8	* version 2.1 of the License, or (at your option) any later version.
	9	*
	10	* This library is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* Lesser General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU Lesser General Public
	16	* License along with this library;
	17	* if not, see <http://www.gnu.org/licenses/>.
	18
	19	*/
	20
	21	#include <Eina.h>
	22	#include "eina_unicode.h"
	23
	24	/* FIXME: check if sizeof(wchar_t) == sizeof(Eina_Unicode) if so,
	25	* probably better to use the standard functions */
	26
	27	/* Maybe I'm too tired, but this is the only thing that actually worked. */
	28	const Eina_Unicode _EINA_UNICODE_EMPTY_STRING[1] = {0};
	29	EAPI const Eina_Unicode *EINA_UNICODE_EMPTY_STRING = _EINA_UNICODE_EMPTY_STRING;
	30	EAPI int
	31	eina_unicode_strcmp(const Eina_Unicode a, const Eina_Unicode b)
	32	{
	33	for (; a && a == *b; a++, b++)
	34	;
	35	if (a == b)
	36	return 0;
	37	else if (a < b)
	38	return -1;
	39	else
	40	return 1;
	41	}
	42
	43	EAPI Eina_Unicode *
	44	eina_unicode_strcpy(Eina_Unicode dest, const Eina_Unicode source)
	45	{
	46	Eina_Unicode *ret = dest;
	47
	48	while (*source)
	49	dest++ = source++;
	50	*dest = 0;
	51	return ret;
	52	}
	53
	54	EAPI Eina_Unicode *
	55	eina_unicode_strncpy(Eina_Unicode dest, const Eina_Unicode source, size_t n)
	56	{
	57	Eina_Unicode *ret = dest;
	58
	59	for ( ; n && *source ; n--)
	60	dest++ = source++;
	61	for (; n; n--)
	62	*dest++ = 0;
	63	return ret;
	64	}
	65
	66	EAPI size_t
	67	eina_unicode_strlen(const Eina_Unicode *ustr)
	68	{
	69	const Eina_Unicode *end;
	70	for (end = ustr; *end; end++)
	71	;
	72	return end - ustr;
	73	}
	74
	75	EAPI size_t
	76	eina_unicode_strnlen(const Eina_Unicode *ustr, int n)
	77	{
	78	const Eina_Unicode *end;
	79	const Eina_Unicode last = ustr + n; / technically not portable ;-) */
	80	for (end = ustr; end < last && *end; end++)
	81	;
	82	return end - ustr;
	83	}
	84
	85
	86
	87
	88	EAPI Eina_Unicode *
	89	eina_unicode_strndup(const Eina_Unicode *text, size_t n)
	90	{
	91	Eina_Unicode *ustr;
	92
	93	ustr = (Eina_Unicode ) malloc((n + 1) sizeof(Eina_Unicode));
	94	memcpy(ustr, text, n * sizeof(Eina_Unicode));
	95	ustr[n] = 0;
	96	return ustr;
	97	}
	98
	99	EAPI Eina_Unicode *
	100	eina_unicode_strdup(const Eina_Unicode *text)
	101	{
	102	size_t len;
	103
	104	len = eina_unicode_strlen(text);
	105	return eina_unicode_strndup(text, len);
	106	}
	107
	108	EAPI Eina_Unicode *
	109	eina_unicode_strstr(const Eina_Unicode haystack, const Eina_Unicode needle)
	110	{
	111	const Eina_Unicode i, j;
	112
	113	for (i = haystack; *i; i++)
	114	{
	115	haystack = i; /* set this location as the base position */
	116	for (j = needle; j && i && j == i; j++, i++)
	117	;
	118
	119	if (!j) /if we got to the end of j this means we got a full match */
	120	{
	121	return (Eina_Unicode )haystack; / return the new base position */
	122	}
	123	}
	124
	125	return NULL;
	126	}
	127
	128	EAPI Eina_Unicode *
	129	eina_unicode_escape(const Eina_Unicode *str)
	130	{
	131	Eina_Unicode s2, d;
	132	const Eina_Unicode *s;
	133
	134	s2 = malloc((eina_unicode_strlen(str) * 2) + 1);
	135	if (!s2)
	136	return NULL;
	137
	138	for (s = str, d = s2; *s != 0; s++, d++)
	139	{
	140	if ((s == ' ') \|\| (s == '\\') \|\| (*s == '\''))
	141	{
	142	*d = '\\';
	143	d++;
	144	}
	145
	146	d = s;
	147	}
	148	*d = 0;
	149	return s2;
	150	}
	151
	152	/* UTF-8 Handling */
	153
	154	#define EINA_UNICODE_UTF8_BYTES_PER_CHAR 6
	155	/* The replacement range that will be used for bad utf8 chars. */
	156	#define ERROR_REPLACEMENT_BASE 0xDC80
	157	#define ERROR_REPLACEMENT_END 0xDCFF
	158	#define IS_INVALID_BYTE(x) ((x == 192) \|\| (x == 193) \|\| (x >= 245))
	159	#define IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80)
	160
	161	EAPI Eina_Unicode
	162	eina_unicode_utf8_get_next(const char buf, int iindex)
	163	{
	164	int ind = *iindex;
	165	Eina_Unicode r;
	166	unsigned char d;
	167
	168	/* if this char is the null terminator, exit */
	169	if ((d = buf[ind++]) == 0) return 0;
	170
	171	if ((d & 0x80) == 0)
	172	{ // 1 byte (7bit) - 0xxxxxxx
	173	*iindex = ind;
	174	return d;
	175	}
	176	if ((d & 0xe0) == 0xc0)
	177	{ // 2 byte (11bit) - 110xxxxx 10xxxxxx
	178	r = (d & 0x1f) << 6;
	179	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	180	!IS_CONTINUATION_BYTE(d)) goto error;
	181	r \|= (d & 0x3f);
	182	if (r <= 0x7F) goto error;
	183	*iindex = ind;
	184	return r;
	185	}
	186	if ((d & 0xf0) == 0xe0)
	187	{ // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx
	188	r = (d & 0x0f) << 12;
	189	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	190	!IS_CONTINUATION_BYTE(d)) goto error;
	191	r \|= (d & 0x3f) << 6;
	192	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	193	!IS_CONTINUATION_BYTE(d)) goto error;
	194	r \|= (d & 0x3f);
	195	if (r <= 0x7FF) goto error;
	196	*iindex = ind;
	197	return r;
	198	}
	199	if ((d & 0xf8) == 0xf0)
	200	{ // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	201	r = (d & 0x07) << 18;
	202	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	203	!IS_CONTINUATION_BYTE(d)) goto error;
	204	r \|= (d & 0x3f) << 12;
	205	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	206	!IS_CONTINUATION_BYTE(d)) goto error;
	207	r \|= (d & 0x3f) << 6;
	208	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	209	!IS_CONTINUATION_BYTE(d)) goto error;
	210	r \|= (d & 0x3f);
	211	if (r <= 0xFFFF) goto error;
	212	*iindex = ind;
	213	return r;
	214	}
	215	if ((d & 0xfc) == 0xf8)
	216	{ // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
	217	r = (d & 0x03) << 24;
	218	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	219	!IS_CONTINUATION_BYTE(d)) goto error;
	220	r \|= (d & 0x3f) << 18;
	221	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	222	!IS_CONTINUATION_BYTE(d)) goto error;
	223	r \|= (d & 0x3f) << 12;
	224	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	225	!IS_CONTINUATION_BYTE(d)) goto error;
	226	r \|= (d & 0x3f) << 6;
	227	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	228	!IS_CONTINUATION_BYTE(d)) goto error;
	229	r \|= (d & 0x3f);
	230	if (r <= 0x1FFFFF) goto error;
	231	*iindex = ind;
	232	return r;
	233	}
	234	if ((d & 0xfe) == 0xfc)
	235	{ // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
	236	r = (d & 0x01) << 30;
	237	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	238	!IS_CONTINUATION_BYTE(d)) goto error;
	239	r \|= (d & 0x3f) << 24;
	240	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	241	!IS_CONTINUATION_BYTE(d)) goto error;
	242	r \|= (d & 0x3f) << 18;
	243	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	244	!IS_CONTINUATION_BYTE(d)) goto error;
	245	r \|= (d & 0x3f) << 12;
	246	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	247	!IS_CONTINUATION_BYTE(d)) goto error;
	248	r \|= (d & 0x3f) << 6;
	249	if (((d = buf[ind++]) == 0) \|\| IS_INVALID_BYTE(d) \|\|
	250	!IS_CONTINUATION_BYTE(d)) goto error;
	251	r \|= (d & 0x3f);
	252	if (r <= 0x3FFFFFF) goto error;
	253	*iindex = ind;
	254	return r;
	255	}
	256
	257	/* Gets here where there was an error and we want to replace the char
	258	* we just use the invalid unicode codepoints 8 lower bits represent
	259	* the original char */
	260	error:
	261	d = buf[*iindex];
	262	(*iindex)++;
	263	return ERROR_REPLACEMENT_BASE \| d;
	264	}
	265
	266	EAPI Eina_Unicode
	267	eina_unicode_utf8_get_prev(const char buf, int iindex)
	268	{
	269	int r;
	270	int ind = *iindex;
	271	/* First obtain the codepoint at iindex */
	272	r = eina_unicode_utf8_get_next(buf, &ind);
	273
	274	/* although when ind == 0 there's no previous char, we still want to get
	275	* the current char */
	276	if (*iindex <= 0)
	277	return r;
	278
	279	/* Next advance iindex to previous codepoint */
	280	ind = *iindex;
	281	ind--;
	282	while ((ind > 0) && ((buf[ind] & 0xc0) == 0x80))
	283	ind--;
	284
	285	*iindex = ind;
	286	return r;
	287	}
	288
	289	EAPI int
	290	eina_unicode_utf8_get_len(const char *buf)
	291	{
	292	/* returns the number of utf8 characters (not bytes) in the string */
	293	int i = 0, len = 0;
	294
	295	while (eina_unicode_utf8_get_next(buf, &i))
	296	len++;
	297
	298	return len;
	299	}
	300
	301	EAPI Eina_Unicode *
	302	eina_unicode_utf8_to_unicode(const char utf, int _len)
	303	{
	304	/* FIXME: Should optimize! */
	305	int len, i;
	306	int ind;
	307	Eina_Unicode buf, uind;
	308
	309	len = eina_unicode_utf8_get_len(utf);
	310	if (_len)
	311	*_len = len;
	312	buf = (Eina_Unicode *) calloc(sizeof(Eina_Unicode), (len + 1));
	313	if (!buf) return buf;
	314
	315	for (i = 0, ind = 0, uind = buf ; i < len ; i++, uind++)
	316	{
	317	*uind = eina_unicode_utf8_get_next(utf, &ind);
	318	}
	319
	320	return buf;
	321	}
	322
	323	EAPI char *
	324	eina_unicode_unicode_to_utf8(const Eina_Unicode uni, int _len)
	325	{
	326	char *buf;
	327	const Eina_Unicode *uind;
	328	char *ind;
	329	int ulen, len;
	330
	331	ulen = eina_unicode_strlen(uni);
	332	buf = (char *) calloc(ulen + 1, EINA_UNICODE_UTF8_BYTES_PER_CHAR);
	333
	334	len = 0;
	335	for (uind = uni, ind = buf ; *uind ; uind++)
	336	{
	337	if (uind <= 0x7F) / 1 byte char */
	338	{
	339	ind++ = uind;
	340	len += 1;
	341	}
	342	else if (uind <= 0x7FF) / 2 byte char */
	343	{
	344	ind++ = 0xC0 \| (unsigned char) (uind >> 6);
	345	ind++ = 0x80 \| (unsigned char) (uind & 0x3F);
	346	len += 2;
	347	}
	348	else if (uind <= 0xFFFF) / 3 byte char */
	349	{
	350	/* If it's a special replacement codepoint */
	351	if (*uind >= ERROR_REPLACEMENT_BASE &&
	352	*uind <= ERROR_REPLACEMENT_END)
	353	{
	354	ind++ = uind & 0xFF;
	355	len += 1;
	356	}
	357	else
	358	{
	359	ind++ = 0xE0 \| (unsigned char) (uind >> 12);
	360	ind++ = 0x80 \| (unsigned char) ((uind >> 6) & 0x3F);
	361	ind++ = 0x80 \| (unsigned char) (uind & 0x3F);
	362	len += 3;
	363	}
	364	}
	365	else if (uind <= 0x1FFFFF) / 4 byte char */
	366	{
	367	ind++ = 0xF0 \| (unsigned char) ((uind >> 18) & 0x07);
	368	ind++ = 0x80 \| (unsigned char) ((uind >> 12) & 0x3F);
	369	ind++ = 0x80 \| (unsigned char) ((uind >> 6) & 0x3F);
	370	ind++ = 0x80 \| (unsigned char) (uind & 0x3F);
	371	len += 4;
	372	}
	373	else if (uind <= 0x3FFFFFF) / 5 byte char */
	374	{
	375	ind++ = 0xF8 \| (unsigned char) ((uind >> 24) & 0x03);
	376	ind++ = 0x80 \| (unsigned char) ((uind >> 18) & 0x3F);
	377	ind++ = 0x80 \| (unsigned char) ((uind >> 12) & 0x3F);
	378	ind++ = 0x80 \| (unsigned char) ((uind >> 6) & 0x3F);
	379	ind++ = 0x80 \| (unsigned char) (uind & 0x3F);
	380	len += 5;
	381	}
	382	else if (uind <= 0x7FFFFFFF) / 6 byte char */
	383	{
	384	ind++ = 0xFC \| (unsigned char) ((uind >> 30) & 0x01);
	385	ind++ = 0x80 \| (unsigned char) ((uind >> 24) & 0x3F);
	386	ind++ = 0x80 \| (unsigned char) ((uind >> 18) & 0x3F);
	387	ind++ = 0x80 \| (unsigned char) ((uind >> 12) & 0x3F);
	388	ind++ = 0x80 \| (unsigned char) ((uind >> 6) & 0x3F);
	389	ind++ = 0x80 \| (unsigned char) (uind & 0x3F);
	390	len += 6;
	391	}
	392	else /* error */
	393	{
	394	/* Do something */
	395	}
	396	}
	397	buf = realloc(buf, len + 1);
	398	buf[len] = '\0';
	399	if (_len)
	400	*_len = len;
	401	return buf;
	402	}
	403
	404
	405