Second Life viewer sources 1.13.2.12

author: Jacek Antonelli 2008-08-15 23:44:46 -0500
committer: Jacek Antonelli 2008-08-15 23:44:46 -0500
commit: 38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4 (patch)
tree: adca584755d22ca041a2dbfc35d4eca01f70b32c /linden/indra/llcommon/llstring.cpp
parent: README.txt (diff)
download: meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.zip
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.gz
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.bz2
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.xz
1 files changed, 854 insertions, 0 deletions
diff --git a/linden/indra/llcommon/llstring.cpp b/linden/indra/llcommon/llstring.cpp
new file mode 100644
index 0000000..5cb42cc
--- /dev/null
+++ b/linden/indra/llcommon/llstring.cpp
@@ -0,0 +1,854 @@
+/** 
+ * @file llstring.cpp
+ * @brief String utility functions and the LLString class.
+ *
+ * Copyright (c) 2001-2007, Linden Research, Inc.
+ * 
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlife.com/developers/opensource/gplv2
+ * 
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at http://secondlife.com/developers/opensource/flossexception
+ * 
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ * 
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ */
+#include "linden_common.h"
+#include "llstring.h"
+#include "llerror.h"
+std::string ll_safe_string(const char* in)
+{
+        if(in) return std::string(in);
+        return std::string();
+}
+U8 hex_as_nybble(char hex)
+{
+        if((hex >= '0') && (hex <= '9'))
+        {
+                return (U8)(hex - '0');
+        }
+        else if((hex >= 'a') && (hex <='f'))
+        {
+                return (U8)(10 + hex - 'a');
+        }
+        else if((hex >= 'A') && (hex <='F'))
+        {
+                return (U8)(10 + hex - 'A');
+        }
+        return 0; // uh - oh, not hex any more...
+}
+// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
+// for the Unicode implementation - this doesn't match because it was written before finding
+// it.
+std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
+{
+        std::string utf8_str = wstring_to_utf8str(wstr);
+        s << utf8_str;
+        return s;
+}
+std::string rawstr_to_utf8(const std::string& raw)
+{
+        LLWString wstr(utf8str_to_wstring(raw));
+        return wstring_to_utf8str(wstr);
+}
+S32 wchar_to_utf8chars(llwchar in_char, char* outchars)
+{
+        U32 cur_char = (U32)in_char;
+        char* base = outchars;
+        if (cur_char < 0x80)
+        {
+                *outchars++ = (U8)cur_char;
+        }
+        else if (cur_char < 0x800)
+        {
+                *outchars++ = 0xC0 | (cur_char >> 6);
+                *outchars++ = 0x80 | (cur_char & 0x3F);
+        }
+        else if (cur_char < 0x10000)
+        {
+                *outchars++ = 0xE0 | (cur_char >> 12);
+                *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
+                *outchars++ = 0x80 | (cur_char & 0x3F);
+        }
+        else if (cur_char < 0x200000)
+        {
+                *outchars++ = 0xF0 | (cur_char >> 18);
+                *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
+                *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
+                *outchars++ = 0x80 | cur_char & 0x3F;
+        }
+        else if (cur_char < 0x4000000)
+        {
+                *outchars++ = 0xF8 | (cur_char >> 24);
+                *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
+                *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
+                *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
+                *outchars++ = 0x80 | cur_char & 0x3F;
+        }
+        else if (cur_char < 0x80000000)
+        {
+                *outchars++ = 0xFC | (cur_char >> 30);
+                *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
+                *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
+                *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
+                *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
+                *outchars++ = 0x80 | cur_char & 0x3F;
+        }
+        else
+        {
+                llwarns << "Invalid Unicode character " << cur_char << "!" << llendl;
+                *outchars++ = LL_UNKNOWN_CHAR;
+        }
+        return outchars - base;
+}       
+S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
+{
+        const U16* base = inchars;
+        U16 cur_char = *inchars++;
+        llwchar char32 = cur_char;
+        if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
+        {
+                // Surrogates
+                char32 = ((llwchar)(cur_char - 0xD800)) << 10;
+                cur_char = *inchars++;
+                char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
+        }
+        else
+        {
+                char32 = (llwchar)cur_char;
+        }
+        *outchar = char32;
+        return inchars - base;
+}
+S32 utf16chars_to_utf8chars(const U16* inchars, char* outchars, S32* nchars8p)
+{
+        // Get 32 bit char32
+        llwchar char32;
+        S32 nchars16 = utf16chars_to_wchar(inchars, &char32);
+        // Convert to utf8
+        S32 nchars8  = wchar_to_utf8chars(char32, outchars);
+        if (nchars8p)
+        {
+                *nchars8p = nchars8;
+        }
+        return nchars16;
+}
+llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
+{
+        llutf16string out;
+        S32 i = 0;
+        while (i < len)
+        {
+                U32 cur_char = utf32str[i];
+                if (cur_char > 0xFFFF)
+                {
+                        out += (0xD7C0 + (cur_char >> 10));
+                        out += (0xDC00 | (cur_char & 0x3FF));
+                }
+                else
+                {
+                        out += cur_char;
+                }
+                i++;
+        }
+        return out;
+}
+llutf16string wstring_to_utf16str(const LLWString &utf32str)
+{
+        const S32 len = (S32)utf32str.length();
+        return wstring_to_utf16str(utf32str, len);
+}
+llutf16string utf8str_to_utf16str ( const LLString& utf8str )
+{
+        LLWString wstr = utf8str_to_wstring ( utf8str );
+        return wstring_to_utf16str ( wstr );
+}
+LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
+{
+        LLWString wout;
+        S32 i = 0;
+        // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
+        const U16* chars16 = &(*(utf16str.begin()));
+        while (i < len)
+        {
+                llwchar cur_char;
+                i += utf16chars_to_wchar(chars16+i, &cur_char);
+                wout += cur_char;
+        }
+        return wout;
+}
+LLWString utf16str_to_wstring(const llutf16string &utf16str)
+{
+        const S32 len = (S32)utf16str.length();
+        return utf16str_to_wstring(utf16str, len);
+}
+S32 wchar_utf8_length(const llwchar wc)
+{
+        if (wc < 0x80)
+        {
+                // This case will also catch negative values which are
+                // technically invalid.
+                return 1;
+        }
+        else if (wc < 0x800)
+        {
+                return 2;
+        }
+        else if (wc < 0x10000)
+        {
+                return 3;
+        }
+        else if (wc < 0x200000)
+        {
+                return 4;
+        }
+        else if (wc < 0x4000000)
+        {
+                return 5;
+        }
+        else
+        {
+                return 6;
+        }
+}
+S32 wstring_utf8_length(const LLWString& wstr)
+{
+        S32 len = 0;
+        for (S32 i = 0; i < (S32)wstr.length(); i++)
+        {
+                len += wchar_utf8_length(wstr[i]);
+        }
+        return len;
+}
+LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
+{
+        LLWString wout;
+        S32 i = 0;
+        while (i < len)
+        {
+                llwchar unichar;
+                U8 cur_char = utf8str[i];
+                if (cur_char < 0x80)
+                {
+                        // Ascii character, just add it
+                        unichar = cur_char;
+                }
+                else
+                {
+                        S32 cont_bytes = 0;
+                        if ((cur_char >> 5) == 0x6)                     // Two byte UTF8 -> 1 UTF32
+                        {
+                                unichar = (0x1F&cur_char);
+                                cont_bytes = 1;
+                        }
+                        else if ((cur_char >> 4) == 0xe)        // Three byte UTF8 -> 1 UTF32
+                        {
+                                unichar = (0x0F&cur_char);
+                                cont_bytes = 2;
+                        }
+                        else if ((cur_char >> 3) == 0x1e)       // Four byte UTF8 -> 1 UTF32
+                        {
+                                unichar = (0x07&cur_char);
+                                cont_bytes = 3;
+                        }
+                        else if ((cur_char >> 2) == 0x3e)       // Five byte UTF8 -> 1 UTF32
+                        {
+                                unichar = (0x03&cur_char);
+                                cont_bytes = 4;
+                        }
+                        else if ((cur_char >> 1) == 0x7e)       // Six byte UTF8 -> 1 UTF32
+                        {
+                                unichar = (0x01&cur_char);
+                                cont_bytes = 5;
+                        }
+                        else
+                        {
+                                wout += LL_UNKNOWN_CHAR;
+                                ++i;
+                                continue;
+                        }
+                        // Check that this character doesn't go past the end of the string
+                        S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
+                        do
+                        {
+                                ++i;
+                                cur_char = utf8str[i];
+                                if ( (cur_char >> 6) == 0x2 )
+                                {
+                                        unichar <<= 6;
+                                        unichar += (0x3F&cur_char);
+                                }
+                                else
+                                {
+                                        // Malformed sequence - roll back to look at this as a new char
+                                        unichar = LL_UNKNOWN_CHAR;
+                                        --i;
+                                        break;
+                                }
+                        } while(i < end);
+                        // Handle overlong characters and NULL characters
+                        if ( ((cont_bytes == 1) && (unichar < 0x80))
+                                || ((cont_bytes == 2) && (unichar < 0x800))
+                                || ((cont_bytes == 3) && (unichar < 0x10000))
+                                || ((cont_bytes == 4) && (unichar < 0x200000))
+                                || ((cont_bytes == 5) && (unichar < 0x4000000)) )
+                        {
+                                unichar = LL_UNKNOWN_CHAR;
+                        }
+                }
+                wout += unichar;
+                ++i;
+        }
+        return wout;
+}
+LLWString utf8str_to_wstring(const std::string& utf8str)
+{
+        const S32 len = (S32)utf8str.length();
+        return utf8str_to_wstring(utf8str, len);
+}
+std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
+{
+        std::string out;
+        S32 i = 0;
+        while (i < len)
+        {
+                char tchars[8];         /* Flawfinder: ignore */
+                S32 n = wchar_to_utf8chars(utf32str[i], tchars);
+                tchars[n] = 0;
+                out += tchars;
+                i++;
+        }
+        return out;
+}
+std::string wstring_to_utf8str(const LLWString& utf32str)
+{
+        const S32 len = (S32)utf32str.length();
+        return wstring_to_utf8str(utf32str, len);
+}
+std::string utf16str_to_utf8str(const llutf16string& utf16str)
+{
+        return wstring_to_utf8str(utf16str_to_wstring(utf16str));
+}
+std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
+{
+        return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
+}
+//LLWString wstring_truncate(const LLWString &wstr, const S32 max_len)
+//{
+//      return wstr.substr(0, llmin((S32)wstr.length(), max_len));
+//}
+//
+//
+//LLWString wstring_trim(const LLWString &wstr)
+//{
+//      LLWString outstr;
+//      outstr = wstring_trimhead(wstr);
+//      outstr = wstring_trimtail(outstr);
+//      return outstr;
+//}
+//
+//
+//LLWString wstring_trimhead(const LLWString &wstr)
+//{
+//      if(wstr.empty())
+//      {
+//              return wstr;
+//      }
+//
+//    S32 i = 0;
+//      while((i < (S32)wstr.length()) && iswspace(wstr[i]))
+//      {
+//              i++;
+//      }
+//      return wstr.substr(i, wstr.length() - i);
+//}
+//
+//
+//LLWString wstring_trimtail(const LLWString &wstr)
+//{                     
+//      if(wstr.empty())
+//      {
+//              return wstr;
+//      }
+//
+//      S32 len = (S32)wstr.length();
+//
+//      S32 i = len - 1;
+//      while (i >= 0 && iswspace(wstr[i]))
+//      {
+//              i--;
+//      }
+//
+//      if (i >= 0)
+//      {
+//              return wstr.substr(0, i + 1);
+//      }
+//      return wstr;
+//}
+//
+//
+//LLWString wstring_copyinto(const LLWString &dest, const LLWString &src, const S32 insert_offset)
+//{
+//      llassert( insert_offset <= (S32)dest.length() );
+//
+//      LLWString out_str = dest.substr(0, insert_offset);
+//      out_str += src;
+//      LLWString tail = dest.substr(insert_offset);
+//      out_str += tail;
+//
+//      return out_str;
+//}
+//LLWString wstring_detabify(const LLWString &wstr, const S32 num_spaces)
+//{
+//      LLWString out_str;
+//      // Replace tabs with spaces
+//      for (S32 i = 0; i < (S32)wstr.length(); i++)
+//      {
+//              if (wstr[i] == '\t')
+//              {
+//                      for (S32 j = 0; j < num_spaces; j++)
+//                              out_str += ' ';
+//              }
+//              else
+//              {
+//                      out_str += wstr[i];
+//              }
+//      }
+//      return out_str;
+//}
+//LLWString wstring_makeASCII(const LLWString &wstr)
+//{
+//      // Replace non-ASCII chars with replace_char
+//      LLWString out_str = wstr;
+//      for (S32 i = 0; i < (S32)out_str.length(); i++)
+//      {
+//              if (out_str[i] > 0x7f)
+//              {
+//                      out_str[i] = LL_UNKNOWN_CHAR;
+//              }
+//      }
+//      return out_str;
+//}
+//LLWString wstring_substChar(const LLWString &wstr, const llwchar target_char, const llwchar replace_char)
+//{
+//      // Replace all occurences of target_char with replace_char
+//      LLWString out_str = wstr;
+//      for (S32 i = 0; i < (S32)out_str.length(); i++)
+//      {
+//              if (out_str[i] == target_char)
+//              {
+//                      out_str[i] = replace_char;
+//              }
+//      }
+//      return out_str;
+//}
+//
+//
+//LLWString wstring_tolower(const LLWString &wstr)
+//{
+//      LLWString out_str = wstr;
+//      for (S32 i = 0; i < (S32)out_str.length(); i++)
+//      {
+//              out_str[i] = towlower(out_str[i]);
+//      }
+//      return out_str;
+//}
+//
+//
+//LLWString wstring_convert_to_lf(const LLWString &wstr)
+//{
+//      const llwchar CR = 13;
+//      // Remove carriage returns from string with CRLF
+//      LLWString out_str;
+//
+//      for (S32 i = 0; i < (S32)wstr.length(); i++)
+//      {
+//              if (wstr[i] != CR)
+//              {
+//                      out_str += wstr[i];
+//              }
+//      }
+//      return out_str;
+//}
+//
+//
+//LLWString wstring_convert_to_crlf(const LLWString &wstr)
+//{
+//      const llwchar LF = 10;
+//      const llwchar CR = 13;
+//      // Remove carriage returns from string with CRLF
+//      LLWString out_str;
+//
+//      for (S32 i = 0; i < (S32)wstr.length(); i++)
+//      {
+//              if (wstr[i] == LF)
+//              {
+//                      out_str += CR;
+//              }
+//              out_str += wstr[i];
+//      }
+//      return out_str;
+//}
+//S32   wstring_compare_insensitive(const LLWString &lhs, const LLWString &rhs)
+//{
+//
+//      if (lhs == rhs)
+//      {
+//              return 0;
+//      }
+//
+//      if (lhs.empty())
+//      {
+//              return rhs.empty() ? 0 : 1;
+//      }
+//
+//      if (rhs.empty())
+//      {
+//              return -1;
+//      }
+//
+//#ifdef LL_LINUX
+//      // doesn't work because gcc 2.95 doesn't correctly implement c_str().  Sigh...
+//      llerrs << "wstring_compare_insensitive doesn't work on Linux!" << llendl;
+//      return 0;
+//#else
+//      LLWString lhs_lower = lhs;
+//      LLWString::toLower(lhs_lower);
+//      std::string lhs_lower = wstring_to_utf8str(lhs_lower);
+//      LLWString rhs_lower = lhs;
+//      LLWString::toLower(rhs_lower);
+//      std::string rhs_lower = wstring_to_utf8str(rhs_lower);
+//
+//      return strcmp(lhs_lower.c_str(), rhs_lower.c_str());
+//#endif
+//}
+std::string utf8str_trim(const std::string& utf8str)
+{
+        LLWString wstr = utf8str_to_wstring(utf8str);
+        LLWString::trim(wstr);
+        return wstring_to_utf8str(wstr);
+}
+std::string utf8str_tolower(const std::string& utf8str)
+{
+        LLWString out_str = utf8str_to_wstring(utf8str);
+        LLWString::toLower(out_str);
+        return wstring_to_utf8str(out_str);
+}
+S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
+{
+        LLWString wlhs = utf8str_to_wstring(lhs);
+        LLWString wrhs = utf8str_to_wstring(rhs);
+        return LLWString::compareInsensitive(wlhs.c_str(), wrhs.c_str());
+}
+std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
+{
+        if (0 == max_len)
+        {
+                return std::string();
+        }
+        if ((S32)utf8str.length() <= max_len)
+        {
+                return utf8str;
+        }
+        else
+        {
+                S32 cur_char = max_len;
+                // If we're ASCII, we don't need to do anything
+                if ((U8)utf8str[cur_char] > 0x7f)
+                {
+                        // If first two bits are (10), it's the tail end of a multibyte char.  We need to shift back
+                        // to the first character
+                        while (0x80 == (0xc0 & utf8str[cur_char]))
+                        {
+                                cur_char--;
+                                // Keep moving forward until we hit the first char;
+                                if (cur_char == 0)
+                                {
+                                        // Make sure we don't trash memory if we've got a bogus string.
+                                        break;
+                                }
+                        }
+                }
+                // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
+                return utf8str.substr(0, cur_char);
+        }
+}
+std::string utf8str_substChar(
+        const std::string& utf8str,
+        const llwchar target_char,
+        const llwchar replace_char)
+{
+        LLWString wstr = utf8str_to_wstring(utf8str);
+        LLWString::replaceChar(wstr, target_char, replace_char);
+        //wstr = wstring_substChar(wstr, target_char, replace_char);
+        return wstring_to_utf8str(wstr);
+}
+std::string utf8str_makeASCII(const std::string& utf8str)
+{
+        LLWString wstr = utf8str_to_wstring(utf8str);
+        LLWString::_makeASCII(wstr);
+        return wstring_to_utf8str(wstr);
+}
+std::string mbcsstring_makeASCII(const std::string& wstr)
+{
+        // Replace non-ASCII chars with replace_char
+        std::string out_str = wstr;
+        for (S32 i = 0; i < (S32)out_str.length(); i++)
+        {
+                if ((U8)out_str[i] > 0x7f)
+                {
+                        out_str[i] = LL_UNKNOWN_CHAR;
+                }
+        }
+        return out_str;
+}
+S32     LLStringOps::collate(const llwchar* a, const llwchar* b)
+{ 
+        #if LL_WINDOWS
+                // in Windows, wide string functions operator on 16-bit strings, 
+                // not the proper 32 bit wide string
+                return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
+        #else
+                return wcscoll(a, b);
+        #endif
+}
+namespace LLStringFn
+{
+        void replace_nonprintable(std::basic_string<char>& string, char replacement)
+        {
+                const char MIN = 0x20;
+                std::basic_string<char>::size_type len = string.size();
+                for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
+                {
+                        if(string[ii] < MIN)
+                        {
+                                string[ii] = replacement;
+                        }
+                }
+        }
+        void replace_nonprintable(
+                std::basic_string<llwchar>& string,
+                llwchar replacement)
+        {
+                const llwchar MIN = 0x20;
+                const llwchar MAX = 0x7f;
+                std::basic_string<llwchar>::size_type len = string.size();
+                for(std::basic_string<llwchar>::size_type ii = 0; ii < len; ++ii)
+                {
+                        if((string[ii] < MIN) || (string[ii] > MAX))
+                        {
+                                string[ii] = replacement;
+                        }
+                }
+        }
+        void replace_nonprintable_and_pipe(std::basic_string<char>& str,
+                                                                           char replacement)
+        {
+                const char MIN  = 0x20;
+                const char PIPE = 0x7c;
+                std::basic_string<char>::size_type len = str.size();
+                for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
+                {
+                        if( (str[ii] < MIN) || (str[ii] == PIPE) )
+                        {
+                                str[ii] = replacement;
+                        }
+                }
+        }
+        void replace_nonprintable_and_pipe(std::basic_string<llwchar>& str,
+                                                                           llwchar replacement)
+        {
+                const llwchar MIN  = 0x20;
+                const llwchar MAX  = 0x7f;
+                const llwchar PIPE = 0x7c;
+                std::basic_string<llwchar>::size_type len = str.size();
+                for(std::basic_string<llwchar>::size_type ii = 0; ii < len; ++ii)
+                {
+                        if( (str[ii] < MIN) || (str[ii] > MAX) || (str[ii] == PIPE) )
+                        {
+                                str[ii] = replacement;
+                        }
+                }
+        }
+}
+////////////////////////////////////////////////////////////
+// Testing
+#ifdef _DEBUG
+template<class T> 
+void LLStringBase<T>::testHarness()
+{
+        LLString s1;
+        
+        llassert( s1.c_str() == NULL );
+        llassert( s1.size() == 0 );
+        llassert( s1.empty() );
+        
+        LLString s2( "hello");
+        llassert( !strcmp( s2.c_str(), "hello" ) );
+        llassert( s2.size() == 5 ); 
+        llassert( !s2.empty() );
+        LLString s3( s2 );
+        llassert( "hello" == s2 );
+        llassert( s2 == "hello" );
+        llassert( s2 > "gello" );
+        llassert( "gello" < s2 );
+        llassert( "gello" != s2 );
+        llassert( s2 != "gello" );
+        LLString s4 = s2;
+        llassert( !s4.empty() );
+        s4.empty();
+        llassert( s4.empty() );
+        
+        LLString s5("");
+        llassert( s5.empty() );
+        
+        llassert( isValidIndex(s5, 0) );
+        llassert( !isValidIndex(s5, 1) );
+        
+        s3 = s2;
+        s4 = "hello again";
+        
+        s4 += "!";
+        s4 += s4;
+        llassert( s4 == "hello again!hello again!" );
+        
+        
+        LLString s6 = s2 + " " + s2;
+        LLString s7 = s6;
+        llassert( s6 == s7 );
+        llassert( !( s6 != s7) );
+        llassert( !(s6 < s7) );
+        llassert( !(s6 > s7) );
+        
+        llassert( !(s6 == "hi"));
+        llassert( s6 == "hello hello");
+        llassert( s6 < "hi");
+        
+        llassert( s6[1] == 'e' );
+        s6[1] = 'f';
+        llassert( s6[1] == 'f' );
+        
+        s2.erase( 4, 1 );
+        llassert( s2 == "hell");
+        s2.insert( 0, 'y' );
+        llassert( s2 == "yhell");
+        s2.erase( 1, 3 );
+        llassert( s2 == "yl");
+        s2.insert( 1, "awn, don't yel");
+        llassert( s2 == "yawn, don't yell");
+        
+        LLString s8 = s2.substr( 6, 5 );
+        llassert( s8 == "don't"  );
+        
+        LLString s9 = "   \t\ntest  \t\t\n  ";
+        trim(s9);
+        llassert( s9 == "test"  );
+        s8 = "abc123&*(ABC";
+        s9 = s8;
+        toUpper(s9);
+        llassert( s9 == "ABC123&*(ABC"  );
+        s9 = s8;
+        toLower(s9);
+        llassert( s9 == "abc123&*(abc"  );
+        LLString s10( 10, 'x' );
+        llassert( s10 == "xxxxxxxxxx" );
+        LLString s11( "monkey in the middle", 7, 2 );
+        llassert( s11 == "in" );
+        LLString s12;  //empty
+        s12 += "foo";
+        llassert( s12 == "foo" );
+        LLString s13;  //empty
+        s13 += 'f';
+        llassert( s13 == "f" );
+}
+#endif  // _DEBUG
author	Jacek Antonelli	2008-08-15 23:44:46 -0500
committer	Jacek Antonelli	2008-08-15 23:44:46 -0500
commit	38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4 (patch)
tree	adca584755d22ca041a2dbfc35d4eca01f70b32c /linden/indra/llcommon/llstring.cpp
parent	README.txt (diff)
download	meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.zip meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.gz meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.bz2 meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.xz

diff --git a/linden/indra/llcommon/llstring.cpp b/linden/indra/llcommon/llstring.cpp new file mode 100644 index 0000000..5cb42cc --- /dev/null +++ b/linden/indra/llcommon/llstring.cpp
@@ -0,0 +1,854 @@
	1	/**
	2	* @file llstring.cpp
	3	* @brief String utility functions and the LLString class.
	4	*
	5	* Copyright (c) 2001-2007, Linden Research, Inc.
	6	*
	7	* The source code in this file ("Source Code") is provided by Linden Lab
	8	* to you under the terms of the GNU General Public License, version 2.0
	9	* ("GPL"), unless you have obtained a separate licensing agreement
	10	* ("Other License"), formally executed by you and Linden Lab. Terms of
	11	* the GPL can be found in doc/GPL-license.txt in this distribution, or
	12	* online at http://secondlife.com/developers/opensource/gplv2
	13	*
	14	* There are special exceptions to the terms and conditions of the GPL as
	15	* it is applied to this Source Code. View the full text of the exception
	16	* in the file doc/FLOSS-exception.txt in this software distribution, or
	17	* online at http://secondlife.com/developers/opensource/flossexception
	18	*
	19	* By copying, modifying or distributing this software, you acknowledge
	20	* that you have read and understood your obligations described above,
	21	* and agree to abide by those obligations.
	22	*
	23	* ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
	24	* WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
	25	* COMPLETENESS OR PERFORMANCE.
	26	*/
	27
	28	#include "linden_common.h"
	29
	30	#include "llstring.h"
	31	#include "llerror.h"
	32
	33	std::string ll_safe_string(const char* in)
	34	{
	35	if(in) return std::string(in);
	36	return std::string();
	37	}
	38
	39	U8 hex_as_nybble(char hex)
	40	{
	41	if((hex >= '0') && (hex <= '9'))
	42	{
	43	return (U8)(hex - '0');
	44	}
	45	else if((hex >= 'a') && (hex <='f'))
	46	{
	47	return (U8)(10 + hex - 'a');
	48	}
	49	else if((hex >= 'A') && (hex <='F'))
	50	{
	51	return (U8)(10 + hex - 'A');
	52	}
	53	return 0; // uh - oh, not hex any more...
	54	}
	55
	56
	57	// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
	58	// for the Unicode implementation - this doesn't match because it was written before finding
	59	// it.
	60
	61
	62	std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
	63	{
	64	std::string utf8_str = wstring_to_utf8str(wstr);
	65	s << utf8_str;
	66	return s;
	67	}
	68
	69	std::string rawstr_to_utf8(const std::string& raw)
	70	{
	71	LLWString wstr(utf8str_to_wstring(raw));
	72	return wstring_to_utf8str(wstr);
	73	}
	74
	75	S32 wchar_to_utf8chars(llwchar in_char, char* outchars)
	76	{
	77	U32 cur_char = (U32)in_char;
	78	char* base = outchars;
	79	if (cur_char < 0x80)
	80	{
	81	*outchars++ = (U8)cur_char;
	82	}
	83	else if (cur_char < 0x800)
	84	{
	85	*outchars++ = 0xC0 \| (cur_char >> 6);
	86	*outchars++ = 0x80 \| (cur_char & 0x3F);
	87	}
	88	else if (cur_char < 0x10000)
	89	{
	90	*outchars++ = 0xE0 \| (cur_char >> 12);
	91	*outchars++ = 0x80 \| ((cur_char >> 6) & 0x3F);
	92	*outchars++ = 0x80 \| (cur_char & 0x3F);
	93	}
	94	else if (cur_char < 0x200000)
	95	{
	96	*outchars++ = 0xF0 \| (cur_char >> 18);
	97	*outchars++ = 0x80 \| ((cur_char >> 12) & 0x3F);
	98	*outchars++ = 0x80 \| ((cur_char >> 6) & 0x3F);
	99	*outchars++ = 0x80 \| cur_char & 0x3F;
	100	}
	101	else if (cur_char < 0x4000000)
	102	{
	103	*outchars++ = 0xF8 \| (cur_char >> 24);
	104	*outchars++ = 0x80 \| ((cur_char >> 18) & 0x3F);
	105	*outchars++ = 0x80 \| ((cur_char >> 12) & 0x3F);
	106	*outchars++ = 0x80 \| ((cur_char >> 6) & 0x3F);
	107	*outchars++ = 0x80 \| cur_char & 0x3F;
	108	}
	109	else if (cur_char < 0x80000000)
	110	{
	111	*outchars++ = 0xFC \| (cur_char >> 30);
	112	*outchars++ = 0x80 \| ((cur_char >> 24) & 0x3F);
	113	*outchars++ = 0x80 \| ((cur_char >> 18) & 0x3F);
	114	*outchars++ = 0x80 \| ((cur_char >> 12) & 0x3F);
	115	*outchars++ = 0x80 \| ((cur_char >> 6) & 0x3F);
	116	*outchars++ = 0x80 \| cur_char & 0x3F;
	117	}
	118	else
	119	{
	120	llwarns << "Invalid Unicode character " << cur_char << "!" << llendl;
	121	*outchars++ = LL_UNKNOWN_CHAR;
	122	}
	123	return outchars - base;
	124	}
	125
	126	S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
	127	{
	128	const U16* base = inchars;
	129	U16 cur_char = *inchars++;
	130	llwchar char32 = cur_char;
	131	if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
	132	{
	133	// Surrogates
	134	char32 = ((llwchar)(cur_char - 0xD800)) << 10;
	135	cur_char = *inchars++;
	136	char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
	137	}
	138	else
	139	{
	140	char32 = (llwchar)cur_char;
	141	}
	142	*outchar = char32;
	143	return inchars - base;
	144	}
	145
	146	S32 utf16chars_to_utf8chars(const U16* inchars, char* outchars, S32* nchars8p)
	147	{
	148	// Get 32 bit char32
	149	llwchar char32;
	150	S32 nchars16 = utf16chars_to_wchar(inchars, &char32);
	151	// Convert to utf8
	152	S32 nchars8 = wchar_to_utf8chars(char32, outchars);
	153	if (nchars8p)
	154	{
	155	*nchars8p = nchars8;
	156	}
	157	return nchars16;
	158	}
	159
	160	llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
	161	{
	162	llutf16string out;
	163
	164	S32 i = 0;
	165	while (i < len)
	166	{
	167	U32 cur_char = utf32str[i];
	168	if (cur_char > 0xFFFF)
	169	{
	170	out += (0xD7C0 + (cur_char >> 10));
	171	out += (0xDC00 \| (cur_char & 0x3FF));
	172	}
	173	else
	174	{
	175	out += cur_char;
	176	}
	177	i++;
	178	}
	179	return out;
	180	}
	181
	182	llutf16string wstring_to_utf16str(const LLWString &utf32str)
	183	{
	184	const S32 len = (S32)utf32str.length();
	185	return wstring_to_utf16str(utf32str, len);
	186	}
	187
	188	llutf16string utf8str_to_utf16str ( const LLString& utf8str )
	189	{
	190	LLWString wstr = utf8str_to_wstring ( utf8str );
	191	return wstring_to_utf16str ( wstr );
	192	}
	193
	194
	195	LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
	196	{
	197	LLWString wout;
	198
	199	S32 i = 0;
	200	// craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
	201	const U16* chars16 = &(*(utf16str.begin()));
	202	while (i < len)
	203	{
	204	llwchar cur_char;
	205	i += utf16chars_to_wchar(chars16+i, &cur_char);
	206	wout += cur_char;
	207	}
	208	return wout;
	209	}
	210
	211	LLWString utf16str_to_wstring(const llutf16string &utf16str)
	212	{
	213	const S32 len = (S32)utf16str.length();
	214	return utf16str_to_wstring(utf16str, len);
	215	}
	216
	217	S32 wchar_utf8_length(const llwchar wc)
	218	{
	219	if (wc < 0x80)
	220	{
	221	// This case will also catch negative values which are
	222	// technically invalid.
	223	return 1;
	224	}
	225	else if (wc < 0x800)
	226	{
	227	return 2;
	228	}
	229	else if (wc < 0x10000)
	230	{
	231	return 3;
	232	}
	233	else if (wc < 0x200000)
	234	{
	235	return 4;
	236	}
	237	else if (wc < 0x4000000)
	238	{
	239	return 5;
	240	}
	241	else
	242	{
	243	return 6;
	244	}
	245	}
	246
	247
	248	S32 wstring_utf8_length(const LLWString& wstr)
	249	{
	250	S32 len = 0;
	251	for (S32 i = 0; i < (S32)wstr.length(); i++)
	252	{
	253	len += wchar_utf8_length(wstr[i]);
	254	}
	255	return len;
	256	}
	257
	258
	259	LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
	260	{
	261	LLWString wout;
	262
	263	S32 i = 0;
	264	while (i < len)
	265	{
	266	llwchar unichar;
	267	U8 cur_char = utf8str[i];
	268
	269	if (cur_char < 0x80)
	270	{
	271	// Ascii character, just add it
	272	unichar = cur_char;
	273	}
	274	else
	275	{
	276	S32 cont_bytes = 0;
	277	if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32
	278	{
	279	unichar = (0x1F&cur_char);
	280	cont_bytes = 1;
	281	}
	282	else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32
	283	{
	284	unichar = (0x0F&cur_char);
	285	cont_bytes = 2;
	286	}
	287	else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32
	288	{
	289	unichar = (0x07&cur_char);
	290	cont_bytes = 3;
	291	}
	292	else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32
	293	{
	294	unichar = (0x03&cur_char);
	295	cont_bytes = 4;
	296	}
	297	else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32
	298	{
	299	unichar = (0x01&cur_char);
	300	cont_bytes = 5;
	301	}
	302	else
	303	{
	304	wout += LL_UNKNOWN_CHAR;
	305	++i;
	306	continue;
	307	}
	308
	309	// Check that this character doesn't go past the end of the string
	310	S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
	311	do
	312	{
	313	++i;
	314
	315	cur_char = utf8str[i];
	316	if ( (cur_char >> 6) == 0x2 )
	317	{
	318	unichar <<= 6;
	319	unichar += (0x3F&cur_char);
	320	}
	321	else
	322	{
	323	// Malformed sequence - roll back to look at this as a new char
	324	unichar = LL_UNKNOWN_CHAR;
	325	--i;
	326	break;
	327	}
	328	} while(i < end);
	329
	330	// Handle overlong characters and NULL characters
	331	if ( ((cont_bytes == 1) && (unichar < 0x80))
	332	\|\| ((cont_bytes == 2) && (unichar < 0x800))
	333	\|\| ((cont_bytes == 3) && (unichar < 0x10000))
	334	\|\| ((cont_bytes == 4) && (unichar < 0x200000))
	335	\|\| ((cont_bytes == 5) && (unichar < 0x4000000)) )
	336	{
	337	unichar = LL_UNKNOWN_CHAR;
	338	}
	339	}
	340
	341	wout += unichar;
	342	++i;
	343	}
	344	return wout;
	345	}
	346
	347	LLWString utf8str_to_wstring(const std::string& utf8str)
	348	{
	349	const S32 len = (S32)utf8str.length();
	350	return utf8str_to_wstring(utf8str, len);
	351	}
	352
	353	std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
	354	{
	355	std::string out;
	356
	357	S32 i = 0;
	358	while (i < len)
	359	{
	360	char tchars[8]; /* Flawfinder: ignore */
	361	S32 n = wchar_to_utf8chars(utf32str[i], tchars);
	362	tchars[n] = 0;
	363	out += tchars;
	364	i++;
	365	}
	366	return out;
	367	}
	368
	369	std::string wstring_to_utf8str(const LLWString& utf32str)
	370	{
	371	const S32 len = (S32)utf32str.length();
	372	return wstring_to_utf8str(utf32str, len);
	373	}
	374
	375	std::string utf16str_to_utf8str(const llutf16string& utf16str)
	376	{
	377	return wstring_to_utf8str(utf16str_to_wstring(utf16str));
	378	}
	379
	380	std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
	381	{
	382	return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
	383	}
	384
	385
	386	//LLWString wstring_truncate(const LLWString &wstr, const S32 max_len)
	387	//{
	388	// return wstr.substr(0, llmin((S32)wstr.length(), max_len));
	389	//}
	390	//
	391	//
	392	//LLWString wstring_trim(const LLWString &wstr)
	393	//{
	394	// LLWString outstr;
	395	// outstr = wstring_trimhead(wstr);
	396	// outstr = wstring_trimtail(outstr);
	397	// return outstr;
	398	//}
	399	//
	400	//
	401	//LLWString wstring_trimhead(const LLWString &wstr)
	402	//{
	403	// if(wstr.empty())
	404	// {
	405	// return wstr;
	406	// }
	407	//
	408	// S32 i = 0;
	409	// while((i < (S32)wstr.length()) && iswspace(wstr[i]))
	410	// {
	411	// i++;
	412	// }
	413	// return wstr.substr(i, wstr.length() - i);
	414	//}
	415	//
	416	//
	417	//LLWString wstring_trimtail(const LLWString &wstr)
	418	//{
	419	// if(wstr.empty())
	420	// {
	421	// return wstr;
	422	// }
	423	//
	424	// S32 len = (S32)wstr.length();
	425	//
	426	// S32 i = len - 1;
	427	// while (i >= 0 && iswspace(wstr[i]))
	428	// {
	429	// i--;
	430	// }
	431	//
	432	// if (i >= 0)
	433	// {
	434	// return wstr.substr(0, i + 1);
	435	// }
	436	// return wstr;
	437	//}
	438	//
	439	//
	440	//LLWString wstring_copyinto(const LLWString &dest, const LLWString &src, const S32 insert_offset)
	441	//{
	442	// llassert( insert_offset <= (S32)dest.length() );
	443	//
	444	// LLWString out_str = dest.substr(0, insert_offset);
	445	// out_str += src;
	446	// LLWString tail = dest.substr(insert_offset);
	447	// out_str += tail;
	448	//
	449	// return out_str;
	450	//}
	451
	452
	453	//LLWString wstring_detabify(const LLWString &wstr, const S32 num_spaces)
	454	//{
	455	// LLWString out_str;
	456	// // Replace tabs with spaces
	457	// for (S32 i = 0; i < (S32)wstr.length(); i++)
	458	// {
	459	// if (wstr[i] == '\t')
	460	// {
	461	// for (S32 j = 0; j < num_spaces; j++)
	462	// out_str += ' ';
	463	// }
	464	// else
	465	// {
	466	// out_str += wstr[i];
	467	// }
	468	// }
	469	// return out_str;
	470	//}
	471
	472
	473	//LLWString wstring_makeASCII(const LLWString &wstr)
	474	//{
	475	// // Replace non-ASCII chars with replace_char
	476	// LLWString out_str = wstr;
	477	// for (S32 i = 0; i < (S32)out_str.length(); i++)
	478	// {
	479	// if (out_str[i] > 0x7f)
	480	// {
	481	// out_str[i] = LL_UNKNOWN_CHAR;
	482	// }
	483	// }
	484	// return out_str;
	485	//}
	486
	487
	488	//LLWString wstring_substChar(const LLWString &wstr, const llwchar target_char, const llwchar replace_char)
	489	//{
	490	// // Replace all occurences of target_char with replace_char
	491	// LLWString out_str = wstr;
	492	// for (S32 i = 0; i < (S32)out_str.length(); i++)
	493	// {
	494	// if (out_str[i] == target_char)
	495	// {
	496	// out_str[i] = replace_char;
	497	// }
	498	// }
	499	// return out_str;
	500	//}
	501	//
	502	//
	503	//LLWString wstring_tolower(const LLWString &wstr)
	504	//{
	505	// LLWString out_str = wstr;
	506	// for (S32 i = 0; i < (S32)out_str.length(); i++)
	507	// {
	508	// out_str[i] = towlower(out_str[i]);
	509	// }
	510	// return out_str;
	511	//}
	512	//
	513	//
	514	//LLWString wstring_convert_to_lf(const LLWString &wstr)
	515	//{
	516	// const llwchar CR = 13;
	517	// // Remove carriage returns from string with CRLF
	518	// LLWString out_str;
	519	//
	520	// for (S32 i = 0; i < (S32)wstr.length(); i++)
	521	// {
	522	// if (wstr[i] != CR)
	523	// {
	524	// out_str += wstr[i];
	525	// }
	526	// }
	527	// return out_str;
	528	//}
	529	//
	530	//
	531	//LLWString wstring_convert_to_crlf(const LLWString &wstr)
	532	//{
	533	// const llwchar LF = 10;
	534	// const llwchar CR = 13;
	535	// // Remove carriage returns from string with CRLF
	536	// LLWString out_str;
	537	//
	538	// for (S32 i = 0; i < (S32)wstr.length(); i++)
	539	// {
	540	// if (wstr[i] == LF)
	541	// {
	542	// out_str += CR;
	543	// }
	544	// out_str += wstr[i];
	545	// }
	546	// return out_str;
	547	//}
	548
	549
	550	//S32 wstring_compare_insensitive(const LLWString &lhs, const LLWString &rhs)
	551	//{
	552	//
	553	// if (lhs == rhs)
	554	// {
	555	// return 0;
	556	// }
	557	//
	558	// if (lhs.empty())
	559	// {
	560	// return rhs.empty() ? 0 : 1;
	561	// }
	562	//
	563	// if (rhs.empty())
	564	// {
	565	// return -1;
	566	// }
	567	//
	568	//#ifdef LL_LINUX
	569	// // doesn't work because gcc 2.95 doesn't correctly implement c_str(). Sigh...
	570	// llerrs << "wstring_compare_insensitive doesn't work on Linux!" << llendl;
	571	// return 0;
	572	//#else
	573	// LLWString lhs_lower = lhs;
	574	// LLWString::toLower(lhs_lower);
	575	// std::string lhs_lower = wstring_to_utf8str(lhs_lower);
	576	// LLWString rhs_lower = lhs;
	577	// LLWString::toLower(rhs_lower);
	578	// std::string rhs_lower = wstring_to_utf8str(rhs_lower);
	579	//
	580	// return strcmp(lhs_lower.c_str(), rhs_lower.c_str());
	581	//#endif
	582	//}
	583
	584
	585	std::string utf8str_trim(const std::string& utf8str)
	586	{
	587	LLWString wstr = utf8str_to_wstring(utf8str);
	588	LLWString::trim(wstr);
	589	return wstring_to_utf8str(wstr);
	590	}
	591
	592
	593	std::string utf8str_tolower(const std::string& utf8str)
	594	{
	595	LLWString out_str = utf8str_to_wstring(utf8str);
	596	LLWString::toLower(out_str);
	597	return wstring_to_utf8str(out_str);
	598	}
	599
	600
	601	S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
	602	{
	603	LLWString wlhs = utf8str_to_wstring(lhs);
	604	LLWString wrhs = utf8str_to_wstring(rhs);
	605	return LLWString::compareInsensitive(wlhs.c_str(), wrhs.c_str());
	606	}
	607
	608	std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
	609	{
	610	if (0 == max_len)
	611	{
	612	return std::string();
	613	}
	614	if ((S32)utf8str.length() <= max_len)
	615	{
	616	return utf8str;
	617	}
	618	else
	619	{
	620	S32 cur_char = max_len;
	621
	622	// If we're ASCII, we don't need to do anything
	623	if ((U8)utf8str[cur_char] > 0x7f)
	624	{
	625	// If first two bits are (10), it's the tail end of a multibyte char. We need to shift back
	626	// to the first character
	627	while (0x80 == (0xc0 & utf8str[cur_char]))
	628	{
	629	cur_char--;
	630	// Keep moving forward until we hit the first char;
	631	if (cur_char == 0)
	632	{
	633	// Make sure we don't trash memory if we've got a bogus string.
	634	break;
	635	}
	636	}
	637	}
	638	// The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
	639	return utf8str.substr(0, cur_char);
	640	}
	641	}
	642
	643	std::string utf8str_substChar(
	644	const std::string& utf8str,
	645	const llwchar target_char,
	646	const llwchar replace_char)
	647	{
	648	LLWString wstr = utf8str_to_wstring(utf8str);
	649	LLWString::replaceChar(wstr, target_char, replace_char);
	650	//wstr = wstring_substChar(wstr, target_char, replace_char);
	651	return wstring_to_utf8str(wstr);
	652	}
	653
	654	std::string utf8str_makeASCII(const std::string& utf8str)
	655	{
	656	LLWString wstr = utf8str_to_wstring(utf8str);
	657	LLWString::_makeASCII(wstr);
	658	return wstring_to_utf8str(wstr);
	659	}
	660
	661	std::string mbcsstring_makeASCII(const std::string& wstr)
	662	{
	663	// Replace non-ASCII chars with replace_char
	664	std::string out_str = wstr;
	665	for (S32 i = 0; i < (S32)out_str.length(); i++)
	666	{
	667	if ((U8)out_str[i] > 0x7f)
	668	{
	669	out_str[i] = LL_UNKNOWN_CHAR;
	670	}
	671	}
	672	return out_str;
	673	}
	674
	675	S32 LLStringOps::collate(const llwchar* a, const llwchar* b)
	676	{
	677	#if LL_WINDOWS
	678	// in Windows, wide string functions operator on 16-bit strings,
	679	// not the proper 32 bit wide string
	680	return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
	681	#else
	682	return wcscoll(a, b);
	683	#endif
	684	}
	685
	686	namespace LLStringFn
	687	{
	688	void replace_nonprintable(std::basic_string<char>& string, char replacement)
	689	{
	690	const char MIN = 0x20;
	691	std::basic_string<char>::size_type len = string.size();
	692	for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
	693	{
	694	if(string[ii] < MIN)
	695	{
	696	string[ii] = replacement;
	697	}
	698	}
	699	}
	700
	701	void replace_nonprintable(
	702	std::basic_string<llwchar>& string,
	703	llwchar replacement)
	704	{
	705	const llwchar MIN = 0x20;
	706	const llwchar MAX = 0x7f;
	707	std::basic_string<llwchar>::size_type len = string.size();
	708	for(std::basic_string<llwchar>::size_type ii = 0; ii < len; ++ii)
	709	{
	710	if((string[ii] < MIN) \|\| (string[ii] > MAX))
	711	{
	712	string[ii] = replacement;
	713	}
	714	}
	715	}
	716
	717	void replace_nonprintable_and_pipe(std::basic_string<char>& str,
	718	char replacement)
	719	{
	720	const char MIN = 0x20;
	721	const char PIPE = 0x7c;
	722	std::basic_string<char>::size_type len = str.size();
	723	for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
	724	{
	725	if( (str[ii] < MIN) \|\| (str[ii] == PIPE) )
	726	{
	727	str[ii] = replacement;
	728	}
	729	}
	730	}
	731
	732	void replace_nonprintable_and_pipe(std::basic_string<llwchar>& str,
	733	llwchar replacement)
	734	{
	735	const llwchar MIN = 0x20;
	736	const llwchar MAX = 0x7f;
	737	const llwchar PIPE = 0x7c;
	738	std::basic_string<llwchar>::size_type len = str.size();
	739	for(std::basic_string<llwchar>::size_type ii = 0; ii < len; ++ii)
	740	{
	741	if( (str[ii] < MIN) \|\| (str[ii] > MAX) \|\| (str[ii] == PIPE) )
	742	{
	743	str[ii] = replacement;
	744	}
	745	}
	746	}
	747	}
	748
	749
	750	////////////////////////////////////////////////////////////
	751	// Testing
	752
	753	#ifdef _DEBUG
	754
	755	template<class T>
	756	void LLStringBase<T>::testHarness()
	757	{
	758	LLString s1;
	759
	760	llassert( s1.c_str() == NULL );
	761	llassert( s1.size() == 0 );
	762	llassert( s1.empty() );
	763
	764	LLString s2( "hello");
	765	llassert( !strcmp( s2.c_str(), "hello" ) );
	766	llassert( s2.size() == 5 );
	767	llassert( !s2.empty() );
	768	LLString s3( s2 );
	769
	770	llassert( "hello" == s2 );
	771	llassert( s2 == "hello" );
	772	llassert( s2 > "gello" );
	773	llassert( "gello" < s2 );
	774	llassert( "gello" != s2 );
	775	llassert( s2 != "gello" );
	776
	777	LLString s4 = s2;
	778	llassert( !s4.empty() );
	779	s4.empty();
	780	llassert( s4.empty() );
	781
	782	LLString s5("");
	783	llassert( s5.empty() );
	784
	785	llassert( isValidIndex(s5, 0) );
	786	llassert( !isValidIndex(s5, 1) );
	787
	788	s3 = s2;
	789	s4 = "hello again";
	790
	791	s4 += "!";
	792	s4 += s4;
	793	llassert( s4 == "hello again!hello again!" );
	794
	795
	796	LLString s6 = s2 + " " + s2;
	797	LLString s7 = s6;
	798	llassert( s6 == s7 );
	799	llassert( !( s6 != s7) );
	800	llassert( !(s6 < s7) );
	801	llassert( !(s6 > s7) );
	802
	803	llassert( !(s6 == "hi"));
	804	llassert( s6 == "hello hello");
	805	llassert( s6 < "hi");
	806
	807	llassert( s6[1] == 'e' );
	808	s6[1] = 'f';
	809	llassert( s6[1] == 'f' );
	810
	811	s2.erase( 4, 1 );
	812	llassert( s2 == "hell");
	813	s2.insert( 0, 'y' );
	814	llassert( s2 == "yhell");
	815	s2.erase( 1, 3 );
	816	llassert( s2 == "yl");
	817	s2.insert( 1, "awn, don't yel");
	818	llassert( s2 == "yawn, don't yell");
	819
	820	LLString s8 = s2.substr( 6, 5 );
	821	llassert( s8 == "don't" );
	822
	823	LLString s9 = " \t\ntest \t\t\n ";
	824	trim(s9);
	825	llassert( s9 == "test" );
	826
	827	s8 = "abc123&*(ABC";
	828
	829	s9 = s8;
	830	toUpper(s9);
	831	llassert( s9 == "ABC123&*(ABC" );
	832
	833	s9 = s8;
	834	toLower(s9);
	835	llassert( s9 == "abc123&*(abc" );
	836
	837
	838	LLString s10( 10, 'x' );
	839	llassert( s10 == "xxxxxxxxxx" );
	840
	841	LLString s11( "monkey in the middle", 7, 2 );
	842	llassert( s11 == "in" );
	843
	844	LLString s12; //empty
	845	s12 += "foo";
	846	llassert( s12 == "foo" );
	847
	848	LLString s13; //empty
	849	s13 += 'f';
	850	llassert( s13 == "f" );
	851	}
	852
	853
	854	#endif // _DEBUG