1 files changed, 435 insertions, 0 deletions
diff --git a/libraries/evas/src/static_deps/liblinebreak/wordbreak.c b/libraries/evas/src/static_deps/liblinebreak/wordbreak.c
new file mode 100644
index 0000000..bbbb7f4
--- /dev/null
+++ b/libraries/evas/src/static_deps/liblinebreak/wordbreak.c
@@ -0,0 +1,435 @@
+/* vim: set tabstop=4 shiftwidth=4: */
+/*
+ * Word breaking in a Unicode sequence.  Designed to be used in a
+ * generic text renderer.
+ *
+ * Copyright (C) 2011-2011 Tom Hacohen <tom@stosb.com>
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute
+ * it freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ *    not claim that you wrote the original software.  If you use this
+ *    software in a product, an acknowledgement in the product
+ *    documentation would be appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must
+ *    not be misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source
+ *    distribution.
+ *
+ * The main reference is Unicode Standard Annex 29 (UAX #29):
+ *              <URL:http://unicode.org/reports/tr29>
+ *
+ * When this library was designed, this annex was at Revision 17, for
+ * Unicode 6.0.0:
+ *              <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
+ *
+ * The Unicode Terms of Use are available at
+ *              <URL:http://www.unicode.org/copyright.html>
+ */
+/**
+ * @file        wordbreak.c
+ *
+ * Implementation of the word breaking algorithm as described in Unicode
+ * Standard Annex 29.
+ *
+ * @version     2.0, 2011/12/12
+ * @author      Tom Hacohen
+ */
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include "linebreak.h"
+#include "linebreakdef.h"
+#include "wordbreak.h"
+#include "wordbreakdata.x"
+#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
+/* Init the wordbreak internals. */
+void init_wordbreak(void)
+{
+        /* Currently does nothing, may be needed in the future. */
+        return;
+}
+/**
+ * Gets the word breaking class of a character.
+ *
+ * @param ch    character to check
+ * @param wbp   pointer to the wbp breaking properties array
+ * @param len   the size of the wbp array in number of items.
+ * @return              the word breaking class if found; \c WBP_Any otherwise
+ */
+static enum WordBreakClass get_char_wb_class(
+                utf32_t ch,
+                struct WordBreakProperties *wbp,
+                size_t len)
+{
+        int min = 0;
+        int max = len - 1;
+        int mid;
+        do
+        {
+                mid = (min + max) / 2;
+                if (ch < wbp[mid].start)
+                        max = mid - 1;
+                else if (ch > wbp[mid].end)
+                        min = mid + 1;
+                else
+                        return wbp[mid].prop;
+        }
+        while (min <= max);
+        return WBP_Any;
+}
+/**
+ * Sets the break types in brks starting from posLast up to posStop.
+ *
+ * It sets the inside chars to #WORDBREAK_INSIDECHAR and the rest to brkType.
+ * Assumes brks is initialized - all the cells with #WORDBREAK_NOBREAK are
+ * cells that we really don't want to break after.
+ *
+ * @param s                             the string
+ * @param brks[out]             the breaks array to fill.
+ * @param posStart              the start position
+ * @param posEnd                the end position
+ * @param len                   the length of the string
+ * @param brkType               the breaks type to use
+ * @param get_next_char function to get the next UTF-32 character
+ */
+static void set_brks_to(const void *s,
+                char *brks,
+                size_t posStart,
+                size_t posEnd,
+                size_t len,
+                char brkType,
+                get_next_char_t get_next_char)
+{
+        size_t posCur = posStart;
+        while (posCur < posEnd)
+        {
+                get_next_char(s, len, &posCur);
+                for ( ; posStart < posCur - 1; ++posStart)
+                {
+                        brks[posStart] = WORDBREAK_INSIDECHAR;
+                }
+                assert(posStart == posCur - 1);
+                /* Only set it if we haven't set it not to break before. */
+                if (brks[posStart] != WORDBREAK_NOBREAK)
+                        brks[posStart] = brkType;
+                posStart = posCur;
+        }
+}
+/* Checks to see if newline, cr, or lf. for WB3a and b */
+#define IS_WB3ab(cls) ((cls == WBP_Newline) || (cls == WBP_CR) || \
+                (cls == WBP_LF))
+/**
+ * Sets the word breaking information for a generic input string.
+ *
+ * @param[in]  s                        input string
+ * @param[in]  len                      length of the input
+ * @param[in]  lang                     language of the input
+ * @param[out] brks                     pointer to the output breaking data, containing
+ *                                                      #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
+ *                                                      #WORDBREAK_INSIDEACHAR
+ * @param[in] get_next_char     function to get the next UTF-32 character
+ */
+static void set_wordbreaks(
+                const void *s,
+                size_t len,
+                const char *lang,
+                char *brks,
+                get_next_char_t get_next_char)
+{
+        /* Previous class */
+        enum WordBreakClass p_cls = WBP_Undefined;
+        /* Strong previous class. */
+        enum WordBreakClass sp_cls = WBP_Undefined;
+        utf32_t ch;
+        size_t posCur = 0;
+        size_t posCurSt = 0;
+        size_t posLast = 0;
+        /* FIXME: unused atm. */
+        (void) lang;
+        /* Init brks */
+        memset(brks, WORDBREAK_BREAK, len);
+        ch = get_next_char(s, len, &posCur);
+        /* WB3a, WB3b are implied. */
+        for ( ; ch != EOS ; )
+        {
+                /* Current class */
+                enum WordBreakClass c_cls;
+                c_cls = get_char_wb_class(ch, wb_prop_default,
+                                ARRAY_LEN(wb_prop_default));
+                switch (c_cls)
+                {
+            case WBP_CR:
+                        set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                        get_next_char);
+                        sp_cls = c_cls;
+                        posLast = posCurSt;
+                        break;
+            case WBP_LF:
+                        if (sp_cls == WBP_CR) /* WB3 */
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
+                                                get_next_char);
+                                sp_cls = c_cls;
+                                posLast = posCurSt;
+                        }
+                        sp_cls = c_cls;
+                        posLast = posCurSt;
+                        break;
+            case WBP_Newline:
+                        /* WB3a, WB3b */
+                        set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                        get_next_char);
+                        sp_cls = c_cls;
+                        posLast = posCurSt;
+                        break;
+            case WBP_Extend:
+            case WBP_Format:
+                        /* WB4 - If not the first char/after a newline (W3ab),
+                         * skip this class, set it to be the same as the prev, and mark
+                         * brks not to break before them. */
+                        if ((sp_cls == WBP_Undefined) || IS_WB3ab(sp_cls))
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                                get_next_char);
+                                sp_cls = c_cls;
+                        }
+                        else
+                        {
+                                /* It's surely not the first */
+                                brks[posCurSt - 1] = WORDBREAK_NOBREAK;
+                                /* "inherit" the previous class. */
+                                c_cls = p_cls;
+                        }
+                        break;
+            case WBP_Katakana:
+                        if ((sp_cls == WBP_Katakana) || /* WB13 */
+                                        (sp_cls == WBP_ExtendNumLet)) /* WB13b */
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
+                                                get_next_char);
+                        }
+                        /* No rule found, reset */
+                        else
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                                get_next_char);
+                        }
+                        sp_cls = c_cls;
+                        posLast = posCurSt;
+                        break;
+            case WBP_ALetter:
+                        if ((sp_cls == WBP_ALetter) || /* WB5,6,7 */
+                                        ((sp_cls == WBP_Numeric) && (p_cls == WBP_Numeric)) || /* WB10 */
+                                        (sp_cls == WBP_ExtendNumLet)) /* WB13b */
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
+                                                get_next_char);
+                        }
+                        /* No rule found, reset */
+                        else
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                                get_next_char);
+                        }
+                        sp_cls = c_cls;
+                        posLast = posCurSt;
+                        break;
+            case WBP_MidNumLet:
+                        if ((p_cls == WBP_ALetter) || /* WBP6,7 */
+                                        (p_cls == WBP_Numeric)) /* WBP11,12 */
+                        {
+                                /* Go on */
+                        }
+                        else
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                                get_next_char);
+                                sp_cls = c_cls;
+                                posLast = posCurSt;
+                        }
+                        break;
+            case WBP_MidLetter:
+                        if (p_cls == WBP_ALetter) /* WBP6,7 */
+                        {
+                                /* Go on */
+                        }
+                        else
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                                get_next_char);
+                                sp_cls = c_cls;
+                                posLast = posCurSt;
+                        }
+                        break;
+            case WBP_MidNum:
+                        if (p_cls == WBP_Numeric) /* WBP11,12 */
+                        {
+                                /* Go on */
+                        }
+                        else
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                                get_next_char);
+                                sp_cls = c_cls;
+                                posLast = posCurSt;
+                        }
+                        break;
+            case WBP_Numeric:
+                        if ((sp_cls == WBP_Numeric) || /* WB8,11,12 */
+                                        ((sp_cls == WBP_ALetter) && (p_cls == WBP_ALetter)) || /* WB9 */
+                                        (sp_cls == WBP_ExtendNumLet)) /* WB13b */
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
+                                                get_next_char);
+                        }
+                        /* No rule found, reset */
+                        else
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                                get_next_char);
+                        }
+                        sp_cls = c_cls;
+                        posLast = posCurSt;
+                        break;
+            case WBP_ExtendNumLet:
+                        /* WB13a,13b */
+                        if ((sp_cls == p_cls) &&
+                                ((p_cls == WBP_ALetter) ||
+                                 (p_cls == WBP_Numeric) ||
+                                 (p_cls == WBP_Katakana) ||
+                                 (p_cls == WBP_ExtendNumLet)))
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
+                                                get_next_char);
+                        }
+                        /* No rule found, reset */
+                        else
+                        {
+                                set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                                get_next_char);
+                        }
+                        sp_cls = c_cls;
+                        posLast = posCurSt;
+                        break;
+                 case WBP_Any:
+                        /* Allow breaks and reset */
+                        set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
+                                        get_next_char);
+                        sp_cls = c_cls;
+                        posLast = posCurSt;
+                        break;
+            default:
+                        /* Error, should never get here! */
+                        assert(0);
+                        break;
+                }
+                p_cls = c_cls;
+                posCurSt = posCur;
+                ch = get_next_char(s, len, &posCur);
+    }
+        /* WB2 */
+        set_brks_to(s, brks, posLast, posCur, len, WORDBREAK_BREAK,
+                        get_next_char);
+}
+/**
+ * Sets the word breaking information for a UTF-8 input string.
+ *
+ * @param[in]  s        input UTF-8 string
+ * @param[in]  len      length of the input
+ * @param[in]  lang     language of the input
+ * @param[out] brks     pointer to the output breaking data, containing
+ *                                      #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
+ *                                      #WORDBREAK_INSIDEACHAR
+ */
+void set_wordbreaks_utf8(
+                const utf8_t *s,
+                size_t len,
+                const char *lang,
+                char *brks)
+{
+        set_wordbreaks(s, len, lang, brks,
+                                   (get_next_char_t)lb_get_next_char_utf8);
+}
+/**
+ * Sets the word breaking information for a UTF-16 input string.
+ *
+ * @param[in]  s        input UTF-16 string
+ * @param[in]  len      length of the input
+ * @param[in]  lang     language of the input
+ * @param[out] brks     pointer to the output breaking data, containing
+ *                                      #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
+ *                                      #WORDBREAK_INSIDEACHAR
+ */
+void set_wordbreaks_utf16(
+                const utf16_t *s,
+                size_t len,
+                const char *lang,
+                char *brks)
+{
+        set_wordbreaks(s, len, lang, brks,
+                                   (get_next_char_t)lb_get_next_char_utf16);
+}
+/**
+ * Sets the word breaking information for a UTF-32 input string.
+ *
+ * @param[in]  s        input UTF-32 string
+ * @param[in]  len      length of the input
+ * @param[in]  lang     language of the input
+ * @param[out] brks     pointer to the output breaking data, containing
+ *                                      #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
+ *                                      #WORDBREAK_INSIDEACHAR
+ */
+void set_wordbreaks_utf32(
+                const utf32_t *s,
+                size_t len,
+                const char *lang,
+                char *brks)
+{
+        set_wordbreaks(s, len, lang, brks,
+                                   (get_next_char_t)lb_get_next_char_utf32);
+}

diff --git a/libraries/evas/src/static_deps/liblinebreak/wordbreak.c b/libraries/evas/src/static_deps/liblinebreak/wordbreak.c new file mode 100644 index 0000000..bbbb7f4 --- /dev/null +++ b/libraries/evas/src/static_deps/liblinebreak/wordbreak.c
@@ -0,0 +1,435 @@
	1	/* vim: set tabstop=4 shiftwidth=4: */
	2
	3	/*
	4	* Word breaking in a Unicode sequence. Designed to be used in a
	5	* generic text renderer.
	6	*
	7	* Copyright (C) 2011-2011 Tom Hacohen <tom@stosb.com>
	8	*
	9	* This software is provided 'as-is', without any express or implied
	10	* warranty. In no event will the author be held liable for any damages
	11	* arising from the use of this software.
	12	*
	13	* Permission is granted to anyone to use this software for any purpose,
	14	* including commercial applications, and to alter it and redistribute
	15	* it freely, subject to the following restrictions:
	16	*
	17	* 1. The origin of this software must not be misrepresented; you must
	18	* not claim that you wrote the original software. If you use this
	19	* software in a product, an acknowledgement in the product
	20	* documentation would be appreciated but is not required.
	21	* 2. Altered source versions must be plainly marked as such, and must
	22	* not be misrepresented as being the original software.
	23	* 3. This notice may not be removed or altered from any source
	24	* distribution.
	25	*
	26	* The main reference is Unicode Standard Annex 29 (UAX #29):
	27	* <URL:http://unicode.org/reports/tr29>
	28	*
	29	* When this library was designed, this annex was at Revision 17, for
	30	* Unicode 6.0.0:
	31	* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
	32	*
	33	* The Unicode Terms of Use are available at
	34	* <URL:http://www.unicode.org/copyright.html>
	35	*/
	36
	37	/**
	38	* @file wordbreak.c
	39	*
	40	* Implementation of the word breaking algorithm as described in Unicode
	41	* Standard Annex 29.
	42	*
	43	* @version 2.0, 2011/12/12
	44	* @author Tom Hacohen
	45	*/
	46
	47
	48	#include <assert.h>
	49	#include <stddef.h>
	50	#include <string.h>
	51	#include "linebreak.h"
	52	#include "linebreakdef.h"
	53
	54	#include "wordbreak.h"
	55	#include "wordbreakdata.x"
	56
	57	#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
	58
	59	/* Init the wordbreak internals. */
	60	void init_wordbreak(void)
	61	{
	62	/* Currently does nothing, may be needed in the future. */
	63	return;
	64	}
	65
	66	/**
	67	* Gets the word breaking class of a character.
	68	*
	69	* @param ch character to check
	70	* @param wbp pointer to the wbp breaking properties array
	71	* @param len the size of the wbp array in number of items.
	72	* @return the word breaking class if found; \c WBP_Any otherwise
	73	*/
	74	static enum WordBreakClass get_char_wb_class(
	75	utf32_t ch,
	76	struct WordBreakProperties *wbp,
	77	size_t len)
	78	{
	79	int min = 0;
	80	int max = len - 1;
	81	int mid;
	82
	83	do
	84	{
	85	mid = (min + max) / 2;
	86
	87	if (ch < wbp[mid].start)
	88	max = mid - 1;
	89	else if (ch > wbp[mid].end)
	90	min = mid + 1;
	91	else
	92	return wbp[mid].prop;
	93	}
	94	while (min <= max);
	95
	96	return WBP_Any;
	97	}
	98
	99	/**
	100	* Sets the break types in brks starting from posLast up to posStop.
	101	*
	102	* It sets the inside chars to #WORDBREAK_INSIDECHAR and the rest to brkType.
	103	* Assumes brks is initialized - all the cells with #WORDBREAK_NOBREAK are
	104	* cells that we really don't want to break after.
	105	*
	106	* @param s the string
	107	* @param brks[out] the breaks array to fill.
	108	* @param posStart the start position
	109	* @param posEnd the end position
	110	* @param len the length of the string
	111	* @param brkType the breaks type to use
	112	* @param get_next_char function to get the next UTF-32 character
	113	*/
	114	static void set_brks_to(const void *s,
	115	char *brks,
	116	size_t posStart,
	117	size_t posEnd,
	118	size_t len,
	119	char brkType,
	120	get_next_char_t get_next_char)
	121	{
	122	size_t posCur = posStart;
	123	while (posCur < posEnd)
	124	{
	125	get_next_char(s, len, &posCur);
	126	for ( ; posStart < posCur - 1; ++posStart)
	127	{
	128	brks[posStart] = WORDBREAK_INSIDECHAR;
	129	}
	130	assert(posStart == posCur - 1);
	131
	132	/* Only set it if we haven't set it not to break before. */
	133	if (brks[posStart] != WORDBREAK_NOBREAK)
	134	brks[posStart] = brkType;
	135	posStart = posCur;
	136	}
	137	}
	138
	139	/* Checks to see if newline, cr, or lf. for WB3a and b */
	140	#define IS_WB3ab(cls) ((cls == WBP_Newline) \|\| (cls == WBP_CR) \|\| \
	141	(cls == WBP_LF))
	142
	143	/**
	144	* Sets the word breaking information for a generic input string.
	145	*
	146	* @param[in] s input string
	147	* @param[in] len length of the input
	148	* @param[in] lang language of the input
	149	* @param[out] brks pointer to the output breaking data, containing
	150	* #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
	151	* #WORDBREAK_INSIDEACHAR
	152	* @param[in] get_next_char function to get the next UTF-32 character
	153	*/
	154	static void set_wordbreaks(
	155	const void *s,
	156	size_t len,
	157	const char *lang,
	158	char *brks,
	159	get_next_char_t get_next_char)
	160	{
	161	/* Previous class */
	162	enum WordBreakClass p_cls = WBP_Undefined;
	163	/* Strong previous class. */
	164	enum WordBreakClass sp_cls = WBP_Undefined;
	165	utf32_t ch;
	166	size_t posCur = 0;
	167	size_t posCurSt = 0;
	168	size_t posLast = 0;
	169
	170	/* FIXME: unused atm. */
	171	(void) lang;
	172
	173
	174	/* Init brks */
	175	memset(brks, WORDBREAK_BREAK, len);
	176
	177	ch = get_next_char(s, len, &posCur);
	178
	179	/* WB3a, WB3b are implied. */
	180	for ( ; ch != EOS ; )
	181	{
	182	/* Current class */
	183	enum WordBreakClass c_cls;
	184	c_cls = get_char_wb_class(ch, wb_prop_default,
	185	ARRAY_LEN(wb_prop_default));
	186
	187	switch (c_cls)
	188	{
	189	case WBP_CR:
	190	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	191	get_next_char);
	192	sp_cls = c_cls;
	193	posLast = posCurSt;
	194	break;
	195
	196	case WBP_LF:
	197	if (sp_cls == WBP_CR) /* WB3 */
	198	{
	199	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
	200	get_next_char);
	201	sp_cls = c_cls;
	202	posLast = posCurSt;
	203	}
	204	sp_cls = c_cls;
	205	posLast = posCurSt;
	206	break;
	207
	208	case WBP_Newline:
	209	/* WB3a, WB3b */
	210	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	211	get_next_char);
	212	sp_cls = c_cls;
	213	posLast = posCurSt;
	214	break;
	215
	216	case WBP_Extend:
	217	case WBP_Format:
	218	/* WB4 - If not the first char/after a newline (W3ab),
	219	* skip this class, set it to be the same as the prev, and mark
	220	* brks not to break before them. */
	221	if ((sp_cls == WBP_Undefined) \|\| IS_WB3ab(sp_cls))
	222	{
	223	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	224	get_next_char);
	225	sp_cls = c_cls;
	226	}
	227	else
	228	{
	229	/* It's surely not the first */
	230	brks[posCurSt - 1] = WORDBREAK_NOBREAK;
	231	/* "inherit" the previous class. */
	232	c_cls = p_cls;
	233	}
	234	break;
	235
	236	case WBP_Katakana:
	237	if ((sp_cls == WBP_Katakana) \|\| /* WB13 */
	238	(sp_cls == WBP_ExtendNumLet)) /* WB13b */
	239	{
	240	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
	241	get_next_char);
	242	}
	243	/* No rule found, reset */
	244	else
	245	{
	246	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	247	get_next_char);
	248	}
	249	sp_cls = c_cls;
	250	posLast = posCurSt;
	251	break;
	252
	253	case WBP_ALetter:
	254	if ((sp_cls == WBP_ALetter) \|\| /* WB5,6,7 */
	255	((sp_cls == WBP_Numeric) && (p_cls == WBP_Numeric)) \|\| /* WB10 */
	256	(sp_cls == WBP_ExtendNumLet)) /* WB13b */
	257	{
	258	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
	259	get_next_char);
	260	}
	261	/* No rule found, reset */
	262	else
	263	{
	264	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	265	get_next_char);
	266	}
	267	sp_cls = c_cls;
	268	posLast = posCurSt;
	269	break;
	270
	271	case WBP_MidNumLet:
	272	if ((p_cls == WBP_ALetter) \|\| /* WBP6,7 */
	273	(p_cls == WBP_Numeric)) /* WBP11,12 */
	274	{
	275	/* Go on */
	276	}
	277	else
	278	{
	279	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	280	get_next_char);
	281	sp_cls = c_cls;
	282	posLast = posCurSt;
	283	}
	284	break;
	285
	286	case WBP_MidLetter:
	287	if (p_cls == WBP_ALetter) /* WBP6,7 */
	288	{
	289	/* Go on */
	290	}
	291	else
	292	{
	293	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	294	get_next_char);
	295	sp_cls = c_cls;
	296	posLast = posCurSt;
	297	}
	298	break;
	299
	300	case WBP_MidNum:
	301	if (p_cls == WBP_Numeric) /* WBP11,12 */
	302	{
	303	/* Go on */
	304	}
	305	else
	306	{
	307	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	308	get_next_char);
	309	sp_cls = c_cls;
	310	posLast = posCurSt;
	311	}
	312	break;
	313
	314	case WBP_Numeric:
	315	if ((sp_cls == WBP_Numeric) \|\| /* WB8,11,12 */
	316	((sp_cls == WBP_ALetter) && (p_cls == WBP_ALetter)) \|\| /* WB9 */
	317	(sp_cls == WBP_ExtendNumLet)) /* WB13b */
	318	{
	319	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
	320	get_next_char);
	321	}
	322	/* No rule found, reset */
	323	else
	324	{
	325	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	326	get_next_char);
	327	}
	328	sp_cls = c_cls;
	329	posLast = posCurSt;
	330	break;
	331
	332	case WBP_ExtendNumLet:
	333	/* WB13a,13b */
	334	if ((sp_cls == p_cls) &&
	335	((p_cls == WBP_ALetter) \|\|
	336	(p_cls == WBP_Numeric) \|\|
	337	(p_cls == WBP_Katakana) \|\|
	338	(p_cls == WBP_ExtendNumLet)))
	339	{
	340	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
	341	get_next_char);
	342	}
	343	/* No rule found, reset */
	344	else
	345	{
	346	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	347	get_next_char);
	348	}
	349	sp_cls = c_cls;
	350	posLast = posCurSt;
	351	break;
	352
	353	case WBP_Any:
	354	/* Allow breaks and reset */
	355	set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
	356	get_next_char);
	357	sp_cls = c_cls;
	358	posLast = posCurSt;
	359	break;
	360
	361	default:
	362	/* Error, should never get here! */
	363	assert(0);
	364	break;
	365	}
	366
	367	p_cls = c_cls;
	368	posCurSt = posCur;
	369	ch = get_next_char(s, len, &posCur);
	370	}
	371
	372	/* WB2 */
	373	set_brks_to(s, brks, posLast, posCur, len, WORDBREAK_BREAK,
	374	get_next_char);
	375	}
	376
	377	/**
	378	* Sets the word breaking information for a UTF-8 input string.
	379	*
	380	* @param[in] s input UTF-8 string
	381	* @param[in] len length of the input
	382	* @param[in] lang language of the input
	383	* @param[out] brks pointer to the output breaking data, containing
	384	* #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
	385	* #WORDBREAK_INSIDEACHAR
	386	*/
	387	void set_wordbreaks_utf8(
	388	const utf8_t *s,
	389	size_t len,
	390	const char *lang,
	391	char *brks)
	392	{
	393	set_wordbreaks(s, len, lang, brks,
	394	(get_next_char_t)lb_get_next_char_utf8);
	395	}
	396
	397	/**
	398	* Sets the word breaking information for a UTF-16 input string.
	399	*
	400	* @param[in] s input UTF-16 string
	401	* @param[in] len length of the input
	402	* @param[in] lang language of the input
	403	* @param[out] brks pointer to the output breaking data, containing
	404	* #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
	405	* #WORDBREAK_INSIDEACHAR
	406	*/
	407	void set_wordbreaks_utf16(
	408	const utf16_t *s,
	409	size_t len,
	410	const char *lang,
	411	char *brks)
	412	{
	413	set_wordbreaks(s, len, lang, brks,
	414	(get_next_char_t)lb_get_next_char_utf16);
	415	}
	416
	417	/**
	418	* Sets the word breaking information for a UTF-32 input string.
	419	*
	420	* @param[in] s input UTF-32 string
	421	* @param[in] len length of the input
	422	* @param[in] lang language of the input
	423	* @param[out] brks pointer to the output breaking data, containing
	424	* #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
	425	* #WORDBREAK_INSIDEACHAR
	426	*/
	427	void set_wordbreaks_utf32(
	428	const utf32_t *s,
	429	size_t len,
	430	const char *lang,
	431	char *brks)
	432	{
	433	set_wordbreaks(s, len, lang, brks,
	434	(get_next_char_t)lb_get_next_char_utf32);
	435	}