1 files changed, 737 insertions, 0 deletions
diff --git a/libraries/evas/src/static_deps/liblinebreak/linebreak.c b/libraries/evas/src/static_deps/liblinebreak/linebreak.c
new file mode 100644
index 0000000..f9ff9a1
--- /dev/null
+++ b/libraries/evas/src/static_deps/liblinebreak/linebreak.c
@@ -0,0 +1,737 @@
+/* vim: set tabstop=4 shiftwidth=4: */
+/*
+ * Line breaking in a Unicode sequence.  Designed to be used in a
+ * generic text renderer.
+ *
+ * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute
+ * it freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ *    not claim that you wrote the original software.  If you use this
+ *    software in a product, an acknowledgement in the product
+ *    documentation would be appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must
+ *    not be misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source
+ *    distribution.
+ *
+ * The main reference is Unicode Standard Annex 14 (UAX #14):
+ *              <URL:http://www.unicode.org/reports/tr14/>
+ *
+ * When this library was designed, this annex was at Revision 19, for
+ * Unicode 5.0.0:
+ *              <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ *
+ * This library has been updated according to Revision 24, for
+ * Unicode 5.2.0:
+ *              <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ *
+ * The Unicode Terms of Use are available at
+ *              <URL:http://www.unicode.org/copyright.html>
+ */
+/**
+ * @file        linebreak.c
+ *
+ * Implementation of the line breaking algorithm as described in Unicode
+ * Standard Annex 14.
+ *
+ * @version     2.0, 2010/01/03
+ * @author      Wu Yongwei
+ */
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include "linebreak.h"
+#include "linebreakdef.h"
+/**
+ * Size of the second-level index to the line breaking properties.
+ */
+#define LINEBREAK_INDEX_SIZE 40
+/**
+ * Version number of the library.
+ */
+const int linebreak_version = LINEBREAK_VERSION;
+/**
+ * Enumeration of break actions.  They are used in the break action
+ * pair table below.
+ */
+enum BreakAction
+{
+        DIR_BRK,                /**< Direct break opportunity */
+        IND_BRK,                /**< Indirect break opportunity */
+        CMI_BRK,                /**< Indirect break opportunity for combining marks */
+        CMP_BRK,                /**< Prohibited break for combining marks */
+        PRH_BRK                 /**< Prohibited break */
+};
+/**
+ * Break action pair table.  This is a direct mapping of Table 2 of
+ * Unicode Standard Annex 14, Revision 24.
+ */
+static enum BreakAction baTable[LBP_JT][LBP_JT] = {
+        {       /* OP */
+                PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, CMP_BRK,
+                PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK },
+        {       /* CL */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* CP */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* QU */
+                PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+                IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
+        {       /* GL */
+                IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+                IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
+        {       /* NS */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* EX */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* SY */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* IS */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* PR */
+                IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
+        {       /* PO */
+                IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* NU */
+                IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* AL */
+                IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* ID */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* IN */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* HY */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* BA */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* BB */
+                IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+                IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
+        {       /* B2 */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* ZW */
+                DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, DIR_BRK,
+                DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* CM */
+                IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
+        {       /* WJ */
+                IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+                IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
+        {       /* H2 */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
+        {       /* H3 */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK },
+        {       /* JL */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK },
+        {       /* JV */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
+        {       /* JT */
+                DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+                PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+                IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
+                PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK }
+};
+/**
+ * Struct for the second-level index to the line breaking properties.
+ */
+struct LineBreakPropertiesIndex
+{
+        utf32_t end;                                    /**< End coding point */
+        struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
+};
+/**
+ * Second-level index to the line breaking properties.
+ */
+static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
+{
+        { 0xFFFFFFFF, lb_prop_default }
+};
+/**
+ * Initializes the second-level index to the line breaking properties.
+ * If it is not called, the performance of #get_char_lb_class_lang (and
+ * thus the main functionality) can be pretty bad, especially for big
+ * code points like those of Chinese.
+ */
+void init_linebreak(void)
+{
+        size_t i;
+        size_t iPropDefault;
+        size_t len;
+        size_t step;
+        len = 0;
+        while (lb_prop_default[len].prop != LBP_Undefined)
+                ++len;
+        step = len / LINEBREAK_INDEX_SIZE;
+        iPropDefault = 0;
+        for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
+        {
+                lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
+                iPropDefault += step;
+                lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
+        }
+        lb_prop_index[--i].end = 0xFFFFFFFF;
+}
+/**
+ * Gets the language-specific line breaking properties.
+ *
+ * @param lang  language of the text
+ * @return              pointer to the language-specific line breaking
+ *                              properties array if found; \c NULL otherwise
+ */
+static struct LineBreakProperties *get_lb_prop_lang(const char *lang)
+{
+        struct LineBreakPropertiesLang *lbplIter;
+        if (lang != NULL)
+        {
+                for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
+                {
+                        if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
+                        {
+                                return lbplIter->lbp;
+                        }
+                }
+        }
+        return NULL;
+}
+/**
+ * Gets the line breaking class of a character from a line breaking
+ * properties array.
+ *
+ * @param ch    character to check
+ * @param lbp   pointer to the line breaking properties array
+ * @return              the line breaking class if found; \c LBP_XX otherwise
+ */
+static enum LineBreakClass get_char_lb_class(
+                utf32_t ch,
+                struct LineBreakProperties *lbp)
+{
+        while (lbp->prop != LBP_Undefined && ch >= lbp->start)
+        {
+                if (ch <= lbp->end)
+                        return lbp->prop;
+                ++lbp;
+        }
+        return LBP_XX;
+}
+/**
+ * Gets the line breaking class of a character from the default line
+ * breaking properties array.
+ *
+ * @param ch    character to check
+ * @return              the line breaking class if found; \c LBP_XX otherwise
+ */
+static enum LineBreakClass get_char_lb_class_default(
+                utf32_t ch)
+{
+        size_t i = 0;
+        while (ch > lb_prop_index[i].end)
+                ++i;
+        assert(i < LINEBREAK_INDEX_SIZE);
+        return get_char_lb_class(ch, lb_prop_index[i].lbp);
+}
+/**
+ * Gets the line breaking class of a character for a specific
+ * language.  This function will check the language-specific data first,
+ * and then the default data if there is no language-specific property
+ * available for the character.
+ *
+ * @param ch            character to check
+ * @param lbpLang       pointer to the language-specific line breaking
+ *                                      properties array
+ * @return                      the line breaking class if found; \c LBP_XX
+ *                                      otherwise
+ */
+static enum LineBreakClass get_char_lb_class_lang(
+                utf32_t ch,
+                struct LineBreakProperties *lbpLang)
+{
+        enum LineBreakClass lbcResult;
+        /* Find the language-specific line breaking class for a character */
+        if (lbpLang)
+        {
+                lbcResult = get_char_lb_class(ch, lbpLang);
+                if (lbcResult != LBP_XX)
+                        return lbcResult;
+        }
+        /* Find the generic language-specific line breaking class, if no
+         * language context is provided, or language-specific data are not
+         * available for the specific character in the specified language */
+        return get_char_lb_class_default(ch);
+}
+/**
+ * Resolves the line breaking class for certain ambiguous or complicated
+ * characters.  They are treated in a simplistic way in this
+ * implementation.
+ *
+ * @param lbc   line breaking class to resolve
+ * @param lang  language of the text
+ * @return              the resolved line breaking class
+ */
+static enum LineBreakClass resolve_lb_class(
+                enum LineBreakClass lbc,
+                const char *lang)
+{
+        switch (lbc)
+        {
+        case LBP_AI:
+                if (lang != NULL &&
+                                (strncmp(lang, "zh", 2) == 0 || /* Chinese */
+                                 strncmp(lang, "ja", 2) == 0 || /* Japanese */
+                                 strncmp(lang, "ko", 2) == 0))  /* Korean */
+                {
+                        return LBP_ID;
+                }
+                /* Fall through */
+        case LBP_SA:
+        case LBP_SG:
+        case LBP_XX:
+                return LBP_AL;
+        default:
+                return lbc;
+        }
+}
+/**
+ * Gets the next Unicode character in a UTF-8 sequence.  The index will
+ * be advanced to the next complete character, unless the end of string
+ * is reached in the middle of a UTF-8 sequence.
+ *
+ * @param[in]     s             input UTF-8 string
+ * @param[in]     len   length of the string in bytes
+ * @param[in,out] ip    pointer to the index
+ * @return                              the Unicode character beginning at the index; or
+ *                                              #EOS if end of input is encountered
+ */
+utf32_t lb_get_next_char_utf8(
+                const utf8_t *s,
+                size_t len,
+                size_t *ip)
+{
+        utf8_t ch;
+        utf32_t res;
+        assert(*ip <= len);
+        if (*ip == len)
+                return EOS;
+        ch = s[*ip];
+        if (ch < 0xC2 || ch > 0xF4)
+        {       /* One-byte sequence, tail (should not occur), or invalid */
+                *ip += 1;
+                return ch;
+        }
+        else if (ch < 0xE0)
+        {       /* Two-byte sequence */
+                if (*ip + 2 > len)
+                        return EOS;
+                res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
+                *ip += 2;
+                return res;
+        }
+        else if (ch < 0xF0)
+        {       /* Three-byte sequence */
+                if (*ip + 3 > len)
+                        return EOS;
+                res = ((ch & 0x0F) << 12) +
+                          ((s[*ip + 1] & 0x3F) << 6) +
+                          ((s[*ip + 2] & 0x3F));
+                *ip += 3;
+                return res;
+        }
+        else
+        {       /* Four-byte sequence */
+                if (*ip + 4 > len)
+                        return EOS;
+                res = ((ch & 0x07) << 18) +
+                          ((s[*ip + 1] & 0x3F) << 12) +
+                          ((s[*ip + 2] & 0x3F) << 6) +
+                          ((s[*ip + 3] & 0x3F));
+                *ip += 4;
+                return res;
+        }
+}
+/**
+ * Gets the next Unicode character in a UTF-16 sequence.  The index will
+ * be advanced to the next complete character, unless the end of string
+ * is reached in the middle of a UTF-16 surrogate pair.
+ *
+ * @param[in]     s             input UTF-16 string
+ * @param[in]     len   length of the string in words
+ * @param[in,out] ip    pointer to the index
+ * @return                              the Unicode character beginning at the index; or
+ *                                              #EOS if end of input is encountered
+ */
+utf32_t lb_get_next_char_utf16(
+                const utf16_t *s,
+                size_t len,
+                size_t *ip)
+{
+        utf16_t ch;
+        assert(*ip <= len);
+        if (*ip == len)
+                return EOS;
+        ch = s[(*ip)++];
+        if (ch < 0xD800 || ch > 0xDBFF)
+        {       /* If the character is not a high surrogate */
+                return ch;
+        }
+        if (*ip == len)
+        {       /* If the input ends here (an error) */
+                --(*ip);
+                return EOS;
+        }
+        if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
+        {       /* If the next character is not the low surrogate (an error) */
+                return ch;
+        }
+        /* Return the constructed character and advance the index again */
+        return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
+}
+/**
+ * Gets the next Unicode character in a UTF-32 sequence.  The index will
+ * be advanced to the next character.
+ *
+ * @param[in]     s             input UTF-32 string
+ * @param[in]     len   length of the string in dwords
+ * @param[in,out] ip    pointer to the index
+ * @return                              the Unicode character beginning at the index; or
+ *                                              #EOS if end of input is encountered
+ */
+utf32_t lb_get_next_char_utf32(
+                const utf32_t *s,
+                size_t len,
+                size_t *ip)
+{
+        assert(*ip <= len);
+        if (*ip == len)
+                return EOS;
+        return s[(*ip)++];
+}
+/**
+ * Sets the line breaking information for a generic input string.
+ *
+ * @param[in]  s                        input string
+ * @param[in]  len                      length of the input
+ * @param[in]  lang                     language of the input
+ * @param[out] brks                     pointer to the output breaking data,
+ *                                                      containing #LINEBREAK_MUSTBREAK,
+ *                                                      #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
+ *                                                      or #LINEBREAK_INSIDEACHAR
+ * @param[in] get_next_char     function to get the next UTF-32 character
+ */
+void set_linebreaks(
+                const void *s,
+                size_t len,
+                const char *lang,
+                char *brks,
+                get_next_char_t get_next_char)
+{
+        utf32_t ch;
+        enum LineBreakClass lbcCur;
+        enum LineBreakClass lbcNew;
+        enum LineBreakClass lbcLast;
+        struct LineBreakProperties *lbpLang;
+        size_t posCur = 0;
+        size_t posLast = 0;
+        --posLast;      /* To be ++'d later */
+        ch = get_next_char(s, len, &posCur);
+        if (ch == EOS)
+                return;
+        lbpLang = get_lb_prop_lang(lang);
+        lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang);
+        lbcNew = LBP_Undefined;
+nextline:
+        /* Special treatment for the first character */
+        switch (lbcCur)
+        {
+        case LBP_LF:
+        case LBP_NL:
+                lbcCur = LBP_BK;
+                break;
+        case LBP_CB:
+                lbcCur = LBP_BA;
+                break;
+        case LBP_SP:
+                lbcCur = LBP_WJ;
+                break;
+        default:
+                break;
+        }
+        /* Process a line till an explicit break or end of string */
+        for (;;)
+        {
+                for (++posLast; posLast < posCur - 1; ++posLast)
+                {
+                        brks[posLast] = LINEBREAK_INSIDEACHAR;
+                }
+                assert(posLast == posCur - 1);
+                lbcLast = lbcNew;
+                ch = get_next_char(s, len, &posCur);
+                if (ch == EOS)
+                        break;
+                lbcNew = get_char_lb_class_lang(ch, lbpLang);
+                if (lbcCur == LBP_BK || (lbcCur == LBP_CR && lbcNew != LBP_LF))
+                {
+                        brks[posLast] = LINEBREAK_MUSTBREAK;
+                        lbcCur = resolve_lb_class(lbcNew, lang);
+                        goto nextline;
+                }
+                switch (lbcNew)
+                {
+                case LBP_SP:
+                        brks[posLast] = LINEBREAK_NOBREAK;
+                        continue;
+                case LBP_BK:
+                case LBP_LF:
+                case LBP_NL:
+                        brks[posLast] = LINEBREAK_NOBREAK;
+                        lbcCur = LBP_BK;
+                        continue;
+                case LBP_CR:
+                        brks[posLast] = LINEBREAK_NOBREAK;
+                        lbcCur = LBP_CR;
+                        continue;
+                case LBP_CB:
+                        brks[posLast] = LINEBREAK_ALLOWBREAK;
+                        lbcCur = LBP_BA;
+                        continue;
+                default:
+                        break;
+                }
+                lbcNew = resolve_lb_class(lbcNew, lang);
+                assert(lbcCur <= LBP_JT);
+                assert(lbcNew <= LBP_JT);
+                switch (baTable[lbcCur - 1][lbcNew - 1])
+                {
+                case DIR_BRK:
+                        brks[posLast] = LINEBREAK_ALLOWBREAK;
+                        break;
+                case CMI_BRK:
+                case IND_BRK:
+                        if (lbcLast == LBP_SP)
+                        {
+                                brks[posLast] = LINEBREAK_ALLOWBREAK;
+                        }
+                        else
+                        {
+                                brks[posLast] = LINEBREAK_NOBREAK;
+                        }
+                        break;
+                case CMP_BRK:
+                        brks[posLast] = LINEBREAK_NOBREAK;
+                        if (lbcLast != LBP_SP)
+                                continue;
+                        break;
+                case PRH_BRK:
+                        brks[posLast] = LINEBREAK_NOBREAK;
+                        break;
+                }
+                lbcCur = lbcNew;
+        }
+        assert(posLast == posCur - 1 && posCur <= len);
+        /* Break after the last character */
+        brks[posLast] = LINEBREAK_MUSTBREAK;
+        /* When the input contains incomplete sequences */
+        while (posCur < len)
+        {
+                brks[posCur++] = LINEBREAK_INSIDEACHAR;
+        }
+}
+/**
+ * Sets the line breaking information for a UTF-8 input string.
+ *
+ * @param[in]  s        input UTF-8 string
+ * @param[in]  len      length of the input
+ * @param[in]  lang     language of the input
+ * @param[out] brks     pointer to the output breaking data, containing
+ *                                      #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ *                                      #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ */
+void set_linebreaks_utf8(
+                const utf8_t *s,
+                size_t len,
+                const char *lang,
+                char *brks)
+{
+        set_linebreaks(s, len, lang, brks,
+                                   (get_next_char_t)lb_get_next_char_utf8);
+}
+/**
+ * Sets the line breaking information for a UTF-16 input string.
+ *
+ * @param[in]  s        input UTF-16 string
+ * @param[in]  len      length of the input
+ * @param[in]  lang     language of the input
+ * @param[out] brks     pointer to the output breaking data, containing
+ *                                      #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ *                                      #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ */
+void set_linebreaks_utf16(
+                const utf16_t *s,
+                size_t len,
+                const char *lang,
+                char *brks)
+{
+        set_linebreaks(s, len, lang, brks,
+                                   (get_next_char_t)lb_get_next_char_utf16);
+}
+/**
+ * Sets the line breaking information for a UTF-32 input string.
+ *
+ * @param[in]  s        input UTF-32 string
+ * @param[in]  len      length of the input
+ * @param[in]  lang     language of the input
+ * @param[out] brks     pointer to the output breaking data, containing
+ *                                      #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ *                                      #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ */
+void set_linebreaks_utf32(
+                const utf32_t *s,
+                size_t len,
+                const char *lang,
+                char *brks)
+{
+        set_linebreaks(s, len, lang, brks,
+                                   (get_next_char_t)lb_get_next_char_utf32);
+}
+/**
+ * Tells whether a line break can occur between two Unicode characters.
+ * This is a wrapper function to expose a simple interface.  Generally
+ * speaking, it is better to use #set_linebreaks_utf32 instead, since
+ * complicated cases involving combining marks, spaces, etc. cannot be
+ * correctly processed.
+ *
+ * @param char1 the first Unicode character
+ * @param char2 the second Unicode character
+ * @param lang  language of the input
+ * @return      one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ *                              #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ */
+int is_line_breakable(
+                utf32_t char1,
+                utf32_t char2,
+                const char* lang)
+{
+        utf32_t s[2];
+        char brks[2];
+        s[0] = char1;
+        s[1] = char2;
+        set_linebreaks_utf32(s, 2, lang, brks);
+        return brks[0];
+}