diff options
Diffstat (limited to 'linden/indra/llcommon/llstring.cpp')
-rw-r--r-- | linden/indra/llcommon/llstring.cpp | 854 |
1 files changed, 854 insertions, 0 deletions
diff --git a/linden/indra/llcommon/llstring.cpp b/linden/indra/llcommon/llstring.cpp new file mode 100644 index 0000000..5cb42cc --- /dev/null +++ b/linden/indra/llcommon/llstring.cpp | |||
@@ -0,0 +1,854 @@ | |||
1 | /** | ||
2 | * @file llstring.cpp | ||
3 | * @brief String utility functions and the LLString class. | ||
4 | * | ||
5 | * Copyright (c) 2001-2007, Linden Research, Inc. | ||
6 | * | ||
7 | * The source code in this file ("Source Code") is provided by Linden Lab | ||
8 | * to you under the terms of the GNU General Public License, version 2.0 | ||
9 | * ("GPL"), unless you have obtained a separate licensing agreement | ||
10 | * ("Other License"), formally executed by you and Linden Lab. Terms of | ||
11 | * the GPL can be found in doc/GPL-license.txt in this distribution, or | ||
12 | * online at http://secondlife.com/developers/opensource/gplv2 | ||
13 | * | ||
14 | * There are special exceptions to the terms and conditions of the GPL as | ||
15 | * it is applied to this Source Code. View the full text of the exception | ||
16 | * in the file doc/FLOSS-exception.txt in this software distribution, or | ||
17 | * online at http://secondlife.com/developers/opensource/flossexception | ||
18 | * | ||
19 | * By copying, modifying or distributing this software, you acknowledge | ||
20 | * that you have read and understood your obligations described above, | ||
21 | * and agree to abide by those obligations. | ||
22 | * | ||
23 | * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO | ||
24 | * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, | ||
25 | * COMPLETENESS OR PERFORMANCE. | ||
26 | */ | ||
27 | |||
28 | #include "linden_common.h" | ||
29 | |||
30 | #include "llstring.h" | ||
31 | #include "llerror.h" | ||
32 | |||
33 | std::string ll_safe_string(const char* in) | ||
34 | { | ||
35 | if(in) return std::string(in); | ||
36 | return std::string(); | ||
37 | } | ||
38 | |||
39 | U8 hex_as_nybble(char hex) | ||
40 | { | ||
41 | if((hex >= '0') && (hex <= '9')) | ||
42 | { | ||
43 | return (U8)(hex - '0'); | ||
44 | } | ||
45 | else if((hex >= 'a') && (hex <='f')) | ||
46 | { | ||
47 | return (U8)(10 + hex - 'a'); | ||
48 | } | ||
49 | else if((hex >= 'A') && (hex <='F')) | ||
50 | { | ||
51 | return (U8)(10 + hex - 'A'); | ||
52 | } | ||
53 | return 0; // uh - oh, not hex any more... | ||
54 | } | ||
55 | |||
56 | |||
57 | // See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c | ||
58 | // for the Unicode implementation - this doesn't match because it was written before finding | ||
59 | // it. | ||
60 | |||
61 | |||
62 | std::ostream& operator<<(std::ostream &s, const LLWString &wstr) | ||
63 | { | ||
64 | std::string utf8_str = wstring_to_utf8str(wstr); | ||
65 | s << utf8_str; | ||
66 | return s; | ||
67 | } | ||
68 | |||
69 | std::string rawstr_to_utf8(const std::string& raw) | ||
70 | { | ||
71 | LLWString wstr(utf8str_to_wstring(raw)); | ||
72 | return wstring_to_utf8str(wstr); | ||
73 | } | ||
74 | |||
75 | S32 wchar_to_utf8chars(llwchar in_char, char* outchars) | ||
76 | { | ||
77 | U32 cur_char = (U32)in_char; | ||
78 | char* base = outchars; | ||
79 | if (cur_char < 0x80) | ||
80 | { | ||
81 | *outchars++ = (U8)cur_char; | ||
82 | } | ||
83 | else if (cur_char < 0x800) | ||
84 | { | ||
85 | *outchars++ = 0xC0 | (cur_char >> 6); | ||
86 | *outchars++ = 0x80 | (cur_char & 0x3F); | ||
87 | } | ||
88 | else if (cur_char < 0x10000) | ||
89 | { | ||
90 | *outchars++ = 0xE0 | (cur_char >> 12); | ||
91 | *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); | ||
92 | *outchars++ = 0x80 | (cur_char & 0x3F); | ||
93 | } | ||
94 | else if (cur_char < 0x200000) | ||
95 | { | ||
96 | *outchars++ = 0xF0 | (cur_char >> 18); | ||
97 | *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); | ||
98 | *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); | ||
99 | *outchars++ = 0x80 | cur_char & 0x3F; | ||
100 | } | ||
101 | else if (cur_char < 0x4000000) | ||
102 | { | ||
103 | *outchars++ = 0xF8 | (cur_char >> 24); | ||
104 | *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F); | ||
105 | *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); | ||
106 | *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); | ||
107 | *outchars++ = 0x80 | cur_char & 0x3F; | ||
108 | } | ||
109 | else if (cur_char < 0x80000000) | ||
110 | { | ||
111 | *outchars++ = 0xFC | (cur_char >> 30); | ||
112 | *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F); | ||
113 | *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F); | ||
114 | *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); | ||
115 | *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); | ||
116 | *outchars++ = 0x80 | cur_char & 0x3F; | ||
117 | } | ||
118 | else | ||
119 | { | ||
120 | llwarns << "Invalid Unicode character " << cur_char << "!" << llendl; | ||
121 | *outchars++ = LL_UNKNOWN_CHAR; | ||
122 | } | ||
123 | return outchars - base; | ||
124 | } | ||
125 | |||
126 | S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar) | ||
127 | { | ||
128 | const U16* base = inchars; | ||
129 | U16 cur_char = *inchars++; | ||
130 | llwchar char32 = cur_char; | ||
131 | if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF)) | ||
132 | { | ||
133 | // Surrogates | ||
134 | char32 = ((llwchar)(cur_char - 0xD800)) << 10; | ||
135 | cur_char = *inchars++; | ||
136 | char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL; | ||
137 | } | ||
138 | else | ||
139 | { | ||
140 | char32 = (llwchar)cur_char; | ||
141 | } | ||
142 | *outchar = char32; | ||
143 | return inchars - base; | ||
144 | } | ||
145 | |||
146 | S32 utf16chars_to_utf8chars(const U16* inchars, char* outchars, S32* nchars8p) | ||
147 | { | ||
148 | // Get 32 bit char32 | ||
149 | llwchar char32; | ||
150 | S32 nchars16 = utf16chars_to_wchar(inchars, &char32); | ||
151 | // Convert to utf8 | ||
152 | S32 nchars8 = wchar_to_utf8chars(char32, outchars); | ||
153 | if (nchars8p) | ||
154 | { | ||
155 | *nchars8p = nchars8; | ||
156 | } | ||
157 | return nchars16; | ||
158 | } | ||
159 | |||
160 | llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len) | ||
161 | { | ||
162 | llutf16string out; | ||
163 | |||
164 | S32 i = 0; | ||
165 | while (i < len) | ||
166 | { | ||
167 | U32 cur_char = utf32str[i]; | ||
168 | if (cur_char > 0xFFFF) | ||
169 | { | ||
170 | out += (0xD7C0 + (cur_char >> 10)); | ||
171 | out += (0xDC00 | (cur_char & 0x3FF)); | ||
172 | } | ||
173 | else | ||
174 | { | ||
175 | out += cur_char; | ||
176 | } | ||
177 | i++; | ||
178 | } | ||
179 | return out; | ||
180 | } | ||
181 | |||
182 | llutf16string wstring_to_utf16str(const LLWString &utf32str) | ||
183 | { | ||
184 | const S32 len = (S32)utf32str.length(); | ||
185 | return wstring_to_utf16str(utf32str, len); | ||
186 | } | ||
187 | |||
188 | llutf16string utf8str_to_utf16str ( const LLString& utf8str ) | ||
189 | { | ||
190 | LLWString wstr = utf8str_to_wstring ( utf8str ); | ||
191 | return wstring_to_utf16str ( wstr ); | ||
192 | } | ||
193 | |||
194 | |||
195 | LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len) | ||
196 | { | ||
197 | LLWString wout; | ||
198 | |||
199 | S32 i = 0; | ||
200 | // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux): | ||
201 | const U16* chars16 = &(*(utf16str.begin())); | ||
202 | while (i < len) | ||
203 | { | ||
204 | llwchar cur_char; | ||
205 | i += utf16chars_to_wchar(chars16+i, &cur_char); | ||
206 | wout += cur_char; | ||
207 | } | ||
208 | return wout; | ||
209 | } | ||
210 | |||
211 | LLWString utf16str_to_wstring(const llutf16string &utf16str) | ||
212 | { | ||
213 | const S32 len = (S32)utf16str.length(); | ||
214 | return utf16str_to_wstring(utf16str, len); | ||
215 | } | ||
216 | |||
217 | S32 wchar_utf8_length(const llwchar wc) | ||
218 | { | ||
219 | if (wc < 0x80) | ||
220 | { | ||
221 | // This case will also catch negative values which are | ||
222 | // technically invalid. | ||
223 | return 1; | ||
224 | } | ||
225 | else if (wc < 0x800) | ||
226 | { | ||
227 | return 2; | ||
228 | } | ||
229 | else if (wc < 0x10000) | ||
230 | { | ||
231 | return 3; | ||
232 | } | ||
233 | else if (wc < 0x200000) | ||
234 | { | ||
235 | return 4; | ||
236 | } | ||
237 | else if (wc < 0x4000000) | ||
238 | { | ||
239 | return 5; | ||
240 | } | ||
241 | else | ||
242 | { | ||
243 | return 6; | ||
244 | } | ||
245 | } | ||
246 | |||
247 | |||
248 | S32 wstring_utf8_length(const LLWString& wstr) | ||
249 | { | ||
250 | S32 len = 0; | ||
251 | for (S32 i = 0; i < (S32)wstr.length(); i++) | ||
252 | { | ||
253 | len += wchar_utf8_length(wstr[i]); | ||
254 | } | ||
255 | return len; | ||
256 | } | ||
257 | |||
258 | |||
259 | LLWString utf8str_to_wstring(const std::string& utf8str, S32 len) | ||
260 | { | ||
261 | LLWString wout; | ||
262 | |||
263 | S32 i = 0; | ||
264 | while (i < len) | ||
265 | { | ||
266 | llwchar unichar; | ||
267 | U8 cur_char = utf8str[i]; | ||
268 | |||
269 | if (cur_char < 0x80) | ||
270 | { | ||
271 | // Ascii character, just add it | ||
272 | unichar = cur_char; | ||
273 | } | ||
274 | else | ||
275 | { | ||
276 | S32 cont_bytes = 0; | ||
277 | if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32 | ||
278 | { | ||
279 | unichar = (0x1F&cur_char); | ||
280 | cont_bytes = 1; | ||
281 | } | ||
282 | else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32 | ||
283 | { | ||
284 | unichar = (0x0F&cur_char); | ||
285 | cont_bytes = 2; | ||
286 | } | ||
287 | else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32 | ||
288 | { | ||
289 | unichar = (0x07&cur_char); | ||
290 | cont_bytes = 3; | ||
291 | } | ||
292 | else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32 | ||
293 | { | ||
294 | unichar = (0x03&cur_char); | ||
295 | cont_bytes = 4; | ||
296 | } | ||
297 | else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32 | ||
298 | { | ||
299 | unichar = (0x01&cur_char); | ||
300 | cont_bytes = 5; | ||
301 | } | ||
302 | else | ||
303 | { | ||
304 | wout += LL_UNKNOWN_CHAR; | ||
305 | ++i; | ||
306 | continue; | ||
307 | } | ||
308 | |||
309 | // Check that this character doesn't go past the end of the string | ||
310 | S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes); | ||
311 | do | ||
312 | { | ||
313 | ++i; | ||
314 | |||
315 | cur_char = utf8str[i]; | ||
316 | if ( (cur_char >> 6) == 0x2 ) | ||
317 | { | ||
318 | unichar <<= 6; | ||
319 | unichar += (0x3F&cur_char); | ||
320 | } | ||
321 | else | ||
322 | { | ||
323 | // Malformed sequence - roll back to look at this as a new char | ||
324 | unichar = LL_UNKNOWN_CHAR; | ||
325 | --i; | ||
326 | break; | ||
327 | } | ||
328 | } while(i < end); | ||
329 | |||
330 | // Handle overlong characters and NULL characters | ||
331 | if ( ((cont_bytes == 1) && (unichar < 0x80)) | ||
332 | || ((cont_bytes == 2) && (unichar < 0x800)) | ||
333 | || ((cont_bytes == 3) && (unichar < 0x10000)) | ||
334 | || ((cont_bytes == 4) && (unichar < 0x200000)) | ||
335 | || ((cont_bytes == 5) && (unichar < 0x4000000)) ) | ||
336 | { | ||
337 | unichar = LL_UNKNOWN_CHAR; | ||
338 | } | ||
339 | } | ||
340 | |||
341 | wout += unichar; | ||
342 | ++i; | ||
343 | } | ||
344 | return wout; | ||
345 | } | ||
346 | |||
347 | LLWString utf8str_to_wstring(const std::string& utf8str) | ||
348 | { | ||
349 | const S32 len = (S32)utf8str.length(); | ||
350 | return utf8str_to_wstring(utf8str, len); | ||
351 | } | ||
352 | |||
353 | std::string wstring_to_utf8str(const LLWString& utf32str, S32 len) | ||
354 | { | ||
355 | std::string out; | ||
356 | |||
357 | S32 i = 0; | ||
358 | while (i < len) | ||
359 | { | ||
360 | char tchars[8]; /* Flawfinder: ignore */ | ||
361 | S32 n = wchar_to_utf8chars(utf32str[i], tchars); | ||
362 | tchars[n] = 0; | ||
363 | out += tchars; | ||
364 | i++; | ||
365 | } | ||
366 | return out; | ||
367 | } | ||
368 | |||
369 | std::string wstring_to_utf8str(const LLWString& utf32str) | ||
370 | { | ||
371 | const S32 len = (S32)utf32str.length(); | ||
372 | return wstring_to_utf8str(utf32str, len); | ||
373 | } | ||
374 | |||
375 | std::string utf16str_to_utf8str(const llutf16string& utf16str) | ||
376 | { | ||
377 | return wstring_to_utf8str(utf16str_to_wstring(utf16str)); | ||
378 | } | ||
379 | |||
380 | std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len) | ||
381 | { | ||
382 | return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len); | ||
383 | } | ||
384 | |||
385 | |||
386 | //LLWString wstring_truncate(const LLWString &wstr, const S32 max_len) | ||
387 | //{ | ||
388 | // return wstr.substr(0, llmin((S32)wstr.length(), max_len)); | ||
389 | //} | ||
390 | // | ||
391 | // | ||
392 | //LLWString wstring_trim(const LLWString &wstr) | ||
393 | //{ | ||
394 | // LLWString outstr; | ||
395 | // outstr = wstring_trimhead(wstr); | ||
396 | // outstr = wstring_trimtail(outstr); | ||
397 | // return outstr; | ||
398 | //} | ||
399 | // | ||
400 | // | ||
401 | //LLWString wstring_trimhead(const LLWString &wstr) | ||
402 | //{ | ||
403 | // if(wstr.empty()) | ||
404 | // { | ||
405 | // return wstr; | ||
406 | // } | ||
407 | // | ||
408 | // S32 i = 0; | ||
409 | // while((i < (S32)wstr.length()) && iswspace(wstr[i])) | ||
410 | // { | ||
411 | // i++; | ||
412 | // } | ||
413 | // return wstr.substr(i, wstr.length() - i); | ||
414 | //} | ||
415 | // | ||
416 | // | ||
417 | //LLWString wstring_trimtail(const LLWString &wstr) | ||
418 | //{ | ||
419 | // if(wstr.empty()) | ||
420 | // { | ||
421 | // return wstr; | ||
422 | // } | ||
423 | // | ||
424 | // S32 len = (S32)wstr.length(); | ||
425 | // | ||
426 | // S32 i = len - 1; | ||
427 | // while (i >= 0 && iswspace(wstr[i])) | ||
428 | // { | ||
429 | // i--; | ||
430 | // } | ||
431 | // | ||
432 | // if (i >= 0) | ||
433 | // { | ||
434 | // return wstr.substr(0, i + 1); | ||
435 | // } | ||
436 | // return wstr; | ||
437 | //} | ||
438 | // | ||
439 | // | ||
440 | //LLWString wstring_copyinto(const LLWString &dest, const LLWString &src, const S32 insert_offset) | ||
441 | //{ | ||
442 | // llassert( insert_offset <= (S32)dest.length() ); | ||
443 | // | ||
444 | // LLWString out_str = dest.substr(0, insert_offset); | ||
445 | // out_str += src; | ||
446 | // LLWString tail = dest.substr(insert_offset); | ||
447 | // out_str += tail; | ||
448 | // | ||
449 | // return out_str; | ||
450 | //} | ||
451 | |||
452 | |||
453 | //LLWString wstring_detabify(const LLWString &wstr, const S32 num_spaces) | ||
454 | //{ | ||
455 | // LLWString out_str; | ||
456 | // // Replace tabs with spaces | ||
457 | // for (S32 i = 0; i < (S32)wstr.length(); i++) | ||
458 | // { | ||
459 | // if (wstr[i] == '\t') | ||
460 | // { | ||
461 | // for (S32 j = 0; j < num_spaces; j++) | ||
462 | // out_str += ' '; | ||
463 | // } | ||
464 | // else | ||
465 | // { | ||
466 | // out_str += wstr[i]; | ||
467 | // } | ||
468 | // } | ||
469 | // return out_str; | ||
470 | //} | ||
471 | |||
472 | |||
473 | //LLWString wstring_makeASCII(const LLWString &wstr) | ||
474 | //{ | ||
475 | // // Replace non-ASCII chars with replace_char | ||
476 | // LLWString out_str = wstr; | ||
477 | // for (S32 i = 0; i < (S32)out_str.length(); i++) | ||
478 | // { | ||
479 | // if (out_str[i] > 0x7f) | ||
480 | // { | ||
481 | // out_str[i] = LL_UNKNOWN_CHAR; | ||
482 | // } | ||
483 | // } | ||
484 | // return out_str; | ||
485 | //} | ||
486 | |||
487 | |||
488 | //LLWString wstring_substChar(const LLWString &wstr, const llwchar target_char, const llwchar replace_char) | ||
489 | //{ | ||
490 | // // Replace all occurences of target_char with replace_char | ||
491 | // LLWString out_str = wstr; | ||
492 | // for (S32 i = 0; i < (S32)out_str.length(); i++) | ||
493 | // { | ||
494 | // if (out_str[i] == target_char) | ||
495 | // { | ||
496 | // out_str[i] = replace_char; | ||
497 | // } | ||
498 | // } | ||
499 | // return out_str; | ||
500 | //} | ||
501 | // | ||
502 | // | ||
503 | //LLWString wstring_tolower(const LLWString &wstr) | ||
504 | //{ | ||
505 | // LLWString out_str = wstr; | ||
506 | // for (S32 i = 0; i < (S32)out_str.length(); i++) | ||
507 | // { | ||
508 | // out_str[i] = towlower(out_str[i]); | ||
509 | // } | ||
510 | // return out_str; | ||
511 | //} | ||
512 | // | ||
513 | // | ||
514 | //LLWString wstring_convert_to_lf(const LLWString &wstr) | ||
515 | //{ | ||
516 | // const llwchar CR = 13; | ||
517 | // // Remove carriage returns from string with CRLF | ||
518 | // LLWString out_str; | ||
519 | // | ||
520 | // for (S32 i = 0; i < (S32)wstr.length(); i++) | ||
521 | // { | ||
522 | // if (wstr[i] != CR) | ||
523 | // { | ||
524 | // out_str += wstr[i]; | ||
525 | // } | ||
526 | // } | ||
527 | // return out_str; | ||
528 | //} | ||
529 | // | ||
530 | // | ||
531 | //LLWString wstring_convert_to_crlf(const LLWString &wstr) | ||
532 | //{ | ||
533 | // const llwchar LF = 10; | ||
534 | // const llwchar CR = 13; | ||
535 | // // Remove carriage returns from string with CRLF | ||
536 | // LLWString out_str; | ||
537 | // | ||
538 | // for (S32 i = 0; i < (S32)wstr.length(); i++) | ||
539 | // { | ||
540 | // if (wstr[i] == LF) | ||
541 | // { | ||
542 | // out_str += CR; | ||
543 | // } | ||
544 | // out_str += wstr[i]; | ||
545 | // } | ||
546 | // return out_str; | ||
547 | //} | ||
548 | |||
549 | |||
550 | //S32 wstring_compare_insensitive(const LLWString &lhs, const LLWString &rhs) | ||
551 | //{ | ||
552 | // | ||
553 | // if (lhs == rhs) | ||
554 | // { | ||
555 | // return 0; | ||
556 | // } | ||
557 | // | ||
558 | // if (lhs.empty()) | ||
559 | // { | ||
560 | // return rhs.empty() ? 0 : 1; | ||
561 | // } | ||
562 | // | ||
563 | // if (rhs.empty()) | ||
564 | // { | ||
565 | // return -1; | ||
566 | // } | ||
567 | // | ||
568 | //#ifdef LL_LINUX | ||
569 | // // doesn't work because gcc 2.95 doesn't correctly implement c_str(). Sigh... | ||
570 | // llerrs << "wstring_compare_insensitive doesn't work on Linux!" << llendl; | ||
571 | // return 0; | ||
572 | //#else | ||
573 | // LLWString lhs_lower = lhs; | ||
574 | // LLWString::toLower(lhs_lower); | ||
575 | // std::string lhs_lower = wstring_to_utf8str(lhs_lower); | ||
576 | // LLWString rhs_lower = lhs; | ||
577 | // LLWString::toLower(rhs_lower); | ||
578 | // std::string rhs_lower = wstring_to_utf8str(rhs_lower); | ||
579 | // | ||
580 | // return strcmp(lhs_lower.c_str(), rhs_lower.c_str()); | ||
581 | //#endif | ||
582 | //} | ||
583 | |||
584 | |||
585 | std::string utf8str_trim(const std::string& utf8str) | ||
586 | { | ||
587 | LLWString wstr = utf8str_to_wstring(utf8str); | ||
588 | LLWString::trim(wstr); | ||
589 | return wstring_to_utf8str(wstr); | ||
590 | } | ||
591 | |||
592 | |||
593 | std::string utf8str_tolower(const std::string& utf8str) | ||
594 | { | ||
595 | LLWString out_str = utf8str_to_wstring(utf8str); | ||
596 | LLWString::toLower(out_str); | ||
597 | return wstring_to_utf8str(out_str); | ||
598 | } | ||
599 | |||
600 | |||
601 | S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs) | ||
602 | { | ||
603 | LLWString wlhs = utf8str_to_wstring(lhs); | ||
604 | LLWString wrhs = utf8str_to_wstring(rhs); | ||
605 | return LLWString::compareInsensitive(wlhs.c_str(), wrhs.c_str()); | ||
606 | } | ||
607 | |||
608 | std::string utf8str_truncate(const std::string& utf8str, const S32 max_len) | ||
609 | { | ||
610 | if (0 == max_len) | ||
611 | { | ||
612 | return std::string(); | ||
613 | } | ||
614 | if ((S32)utf8str.length() <= max_len) | ||
615 | { | ||
616 | return utf8str; | ||
617 | } | ||
618 | else | ||
619 | { | ||
620 | S32 cur_char = max_len; | ||
621 | |||
622 | // If we're ASCII, we don't need to do anything | ||
623 | if ((U8)utf8str[cur_char] > 0x7f) | ||
624 | { | ||
625 | // If first two bits are (10), it's the tail end of a multibyte char. We need to shift back | ||
626 | // to the first character | ||
627 | while (0x80 == (0xc0 & utf8str[cur_char])) | ||
628 | { | ||
629 | cur_char--; | ||
630 | // Keep moving forward until we hit the first char; | ||
631 | if (cur_char == 0) | ||
632 | { | ||
633 | // Make sure we don't trash memory if we've got a bogus string. | ||
634 | break; | ||
635 | } | ||
636 | } | ||
637 | } | ||
638 | // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars | ||
639 | return utf8str.substr(0, cur_char); | ||
640 | } | ||
641 | } | ||
642 | |||
643 | std::string utf8str_substChar( | ||
644 | const std::string& utf8str, | ||
645 | const llwchar target_char, | ||
646 | const llwchar replace_char) | ||
647 | { | ||
648 | LLWString wstr = utf8str_to_wstring(utf8str); | ||
649 | LLWString::replaceChar(wstr, target_char, replace_char); | ||
650 | //wstr = wstring_substChar(wstr, target_char, replace_char); | ||
651 | return wstring_to_utf8str(wstr); | ||
652 | } | ||
653 | |||
654 | std::string utf8str_makeASCII(const std::string& utf8str) | ||
655 | { | ||
656 | LLWString wstr = utf8str_to_wstring(utf8str); | ||
657 | LLWString::_makeASCII(wstr); | ||
658 | return wstring_to_utf8str(wstr); | ||
659 | } | ||
660 | |||
661 | std::string mbcsstring_makeASCII(const std::string& wstr) | ||
662 | { | ||
663 | // Replace non-ASCII chars with replace_char | ||
664 | std::string out_str = wstr; | ||
665 | for (S32 i = 0; i < (S32)out_str.length(); i++) | ||
666 | { | ||
667 | if ((U8)out_str[i] > 0x7f) | ||
668 | { | ||
669 | out_str[i] = LL_UNKNOWN_CHAR; | ||
670 | } | ||
671 | } | ||
672 | return out_str; | ||
673 | } | ||
674 | |||
675 | S32 LLStringOps::collate(const llwchar* a, const llwchar* b) | ||
676 | { | ||
677 | #if LL_WINDOWS | ||
678 | // in Windows, wide string functions operator on 16-bit strings, | ||
679 | // not the proper 32 bit wide string | ||
680 | return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str()); | ||
681 | #else | ||
682 | return wcscoll(a, b); | ||
683 | #endif | ||
684 | } | ||
685 | |||
686 | namespace LLStringFn | ||
687 | { | ||
688 | void replace_nonprintable(std::basic_string<char>& string, char replacement) | ||
689 | { | ||
690 | const char MIN = 0x20; | ||
691 | std::basic_string<char>::size_type len = string.size(); | ||
692 | for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii) | ||
693 | { | ||
694 | if(string[ii] < MIN) | ||
695 | { | ||
696 | string[ii] = replacement; | ||
697 | } | ||
698 | } | ||
699 | } | ||
700 | |||
701 | void replace_nonprintable( | ||
702 | std::basic_string<llwchar>& string, | ||
703 | llwchar replacement) | ||
704 | { | ||
705 | const llwchar MIN = 0x20; | ||
706 | const llwchar MAX = 0x7f; | ||
707 | std::basic_string<llwchar>::size_type len = string.size(); | ||
708 | for(std::basic_string<llwchar>::size_type ii = 0; ii < len; ++ii) | ||
709 | { | ||
710 | if((string[ii] < MIN) || (string[ii] > MAX)) | ||
711 | { | ||
712 | string[ii] = replacement; | ||
713 | } | ||
714 | } | ||
715 | } | ||
716 | |||
717 | void replace_nonprintable_and_pipe(std::basic_string<char>& str, | ||
718 | char replacement) | ||
719 | { | ||
720 | const char MIN = 0x20; | ||
721 | const char PIPE = 0x7c; | ||
722 | std::basic_string<char>::size_type len = str.size(); | ||
723 | for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii) | ||
724 | { | ||
725 | if( (str[ii] < MIN) || (str[ii] == PIPE) ) | ||
726 | { | ||
727 | str[ii] = replacement; | ||
728 | } | ||
729 | } | ||
730 | } | ||
731 | |||
732 | void replace_nonprintable_and_pipe(std::basic_string<llwchar>& str, | ||
733 | llwchar replacement) | ||
734 | { | ||
735 | const llwchar MIN = 0x20; | ||
736 | const llwchar MAX = 0x7f; | ||
737 | const llwchar PIPE = 0x7c; | ||
738 | std::basic_string<llwchar>::size_type len = str.size(); | ||
739 | for(std::basic_string<llwchar>::size_type ii = 0; ii < len; ++ii) | ||
740 | { | ||
741 | if( (str[ii] < MIN) || (str[ii] > MAX) || (str[ii] == PIPE) ) | ||
742 | { | ||
743 | str[ii] = replacement; | ||
744 | } | ||
745 | } | ||
746 | } | ||
747 | } | ||
748 | |||
749 | |||
750 | //////////////////////////////////////////////////////////// | ||
751 | // Testing | ||
752 | |||
753 | #ifdef _DEBUG | ||
754 | |||
755 | template<class T> | ||
756 | void LLStringBase<T>::testHarness() | ||
757 | { | ||
758 | LLString s1; | ||
759 | |||
760 | llassert( s1.c_str() == NULL ); | ||
761 | llassert( s1.size() == 0 ); | ||
762 | llassert( s1.empty() ); | ||
763 | |||
764 | LLString s2( "hello"); | ||
765 | llassert( !strcmp( s2.c_str(), "hello" ) ); | ||
766 | llassert( s2.size() == 5 ); | ||
767 | llassert( !s2.empty() ); | ||
768 | LLString s3( s2 ); | ||
769 | |||
770 | llassert( "hello" == s2 ); | ||
771 | llassert( s2 == "hello" ); | ||
772 | llassert( s2 > "gello" ); | ||
773 | llassert( "gello" < s2 ); | ||
774 | llassert( "gello" != s2 ); | ||
775 | llassert( s2 != "gello" ); | ||
776 | |||
777 | LLString s4 = s2; | ||
778 | llassert( !s4.empty() ); | ||
779 | s4.empty(); | ||
780 | llassert( s4.empty() ); | ||
781 | |||
782 | LLString s5(""); | ||
783 | llassert( s5.empty() ); | ||
784 | |||
785 | llassert( isValidIndex(s5, 0) ); | ||
786 | llassert( !isValidIndex(s5, 1) ); | ||
787 | |||
788 | s3 = s2; | ||
789 | s4 = "hello again"; | ||
790 | |||
791 | s4 += "!"; | ||
792 | s4 += s4; | ||
793 | llassert( s4 == "hello again!hello again!" ); | ||
794 | |||
795 | |||
796 | LLString s6 = s2 + " " + s2; | ||
797 | LLString s7 = s6; | ||
798 | llassert( s6 == s7 ); | ||
799 | llassert( !( s6 != s7) ); | ||
800 | llassert( !(s6 < s7) ); | ||
801 | llassert( !(s6 > s7) ); | ||
802 | |||
803 | llassert( !(s6 == "hi")); | ||
804 | llassert( s6 == "hello hello"); | ||
805 | llassert( s6 < "hi"); | ||
806 | |||
807 | llassert( s6[1] == 'e' ); | ||
808 | s6[1] = 'f'; | ||
809 | llassert( s6[1] == 'f' ); | ||
810 | |||
811 | s2.erase( 4, 1 ); | ||
812 | llassert( s2 == "hell"); | ||
813 | s2.insert( 0, 'y' ); | ||
814 | llassert( s2 == "yhell"); | ||
815 | s2.erase( 1, 3 ); | ||
816 | llassert( s2 == "yl"); | ||
817 | s2.insert( 1, "awn, don't yel"); | ||
818 | llassert( s2 == "yawn, don't yell"); | ||
819 | |||
820 | LLString s8 = s2.substr( 6, 5 ); | ||
821 | llassert( s8 == "don't" ); | ||
822 | |||
823 | LLString s9 = " \t\ntest \t\t\n "; | ||
824 | trim(s9); | ||
825 | llassert( s9 == "test" ); | ||
826 | |||
827 | s8 = "abc123&*(ABC"; | ||
828 | |||
829 | s9 = s8; | ||
830 | toUpper(s9); | ||
831 | llassert( s9 == "ABC123&*(ABC" ); | ||
832 | |||
833 | s9 = s8; | ||
834 | toLower(s9); | ||
835 | llassert( s9 == "abc123&*(abc" ); | ||
836 | |||
837 | |||
838 | LLString s10( 10, 'x' ); | ||
839 | llassert( s10 == "xxxxxxxxxx" ); | ||
840 | |||
841 | LLString s11( "monkey in the middle", 7, 2 ); | ||
842 | llassert( s11 == "in" ); | ||
843 | |||
844 | LLString s12; //empty | ||
845 | s12 += "foo"; | ||
846 | llassert( s12 == "foo" ); | ||
847 | |||
848 | LLString s13; //empty | ||
849 | s13 += 'f'; | ||
850 | llassert( s13 == "f" ); | ||
851 | } | ||
852 | |||
853 | |||
854 | #endif // _DEBUG | ||